Format code

This commit is contained in:
taizan-hokuto
2020-06-04 23:10:26 +09:00
parent e6dbc8772e
commit 2474207691
50 changed files with 635 additions and 622 deletions

View File

@@ -28,3 +28,5 @@ from .api import (
SuperchatCalculator,
VideoInfo
)
# flake8: noqa

View File

@@ -14,3 +14,5 @@ from .processors.speed.calculator import SpeedCalculator
from .processors.superchat.calculator import SuperchatCalculator
from .tool.extract.extractor import Extractor
from .tool.videoinfo import VideoInfo
# flake8: noqa

View File

@@ -1,11 +1,7 @@
import argparse
import os
from pathlib import Path
from typing import List, Callable
from .arguments import Arguments
from .. exceptions import InvalidVideoIdException, NoContentsException
from .. processors.tsv_archiver import TSVArchiver
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
@@ -18,6 +14,8 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
def main():
# Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
@@ -47,7 +45,8 @@ def main():
path = Path(Arguments().output + video_id + '.html')
print(f"output path: {path.resolve()}")
Extractor(video_id,
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
processor=HTMLArchiver(
Arguments().output + video_id + '.html'),
callback=_disp_progress
).extract()
print("\nExtraction end.\n")
@@ -56,5 +55,6 @@ def main():
return
parser.print_help()
def _disp_progress(a, b):
print('.', end="", flush=True)

View File

@@ -8,6 +8,7 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
class Arguments(metaclass=Singleton):
"""
Arguments singleton

View File

@@ -4,6 +4,8 @@ Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
class Singleton(type):
"""
Abstract class for singletons

View File

@@ -1,11 +1,9 @@
import logging
from . import mylogger
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
def logger(module_name: str, loglevel=None):
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
return module_logger

View File

@@ -1,11 +1,11 @@
from logging import NullHandler, getLogger, StreamHandler, FileHandler, Formatter
from logging import NullHandler, getLogger, StreamHandler, FileHandler
import logging
from datetime import datetime
def get_logger(modname, loglevel=logging.DEBUG):
logger = getLogger(modname)
if loglevel == None:
if loglevel is None:
logger.addHandler(NullHandler())
return logger
logger.setLevel(loglevel)
@@ -22,10 +22,10 @@ def get_logger(modname,loglevel=logging.DEBUG):
handler2.setLevel(logging.ERROR)
handler2.setFormatter(my_formatter)
logger.addHandler(handler2)
return logger
class MyFormatter(logging.Formatter):
def format(self, record):
timestamp = (

View File

@@ -1,5 +1,7 @@
import asyncio
class Buffer(asyncio.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
@@ -10,6 +12,7 @@ class Buffer(asyncio.Queue):
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
'''
def __init__(self, maxsize=0):
super().__init__(maxsize)

View File

@@ -177,7 +177,8 @@ class LiveChatAsync:
}
time_mark = time.time()
if self._direct_mode:
processed_chat = self.processor.process([chat_component])
processed_chat = self.processor.process(
[chat_component])
if isinstance(processed_chat, tuple):
await self._callback(*processed_chat)
else:

View File

@@ -1,6 +1,7 @@
import queue
class Buffer(queue.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
@@ -11,6 +12,7 @@ class Buffer(queue.Queue):
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
'''
def __init__(self, maxsize=0):
super().__init__(maxsize=maxsize)

View File

@@ -4,18 +4,21 @@ class ChatParseException(Exception):
'''
pass
class NoYtinitialdataException(ChatParseException):
'''
Thrown when the video is not found.
'''
pass
class ResponseContextError(ChatParseException):
'''
Thrown when chat data is invalid.
'''
pass
class NoLivechatRendererException(ChatParseException):
'''
Thrown when livechatRenderer is missing in JSON.
@@ -29,12 +32,14 @@ class NoContentsException(ChatParseException):
'''
pass
class NoContinuationsException(ChatParseException):
'''
Thrown when continuation is missing in ContinuationContents.
'''
pass
class IllegalFunctionCall(Exception):
'''
Thrown when get () is called even though
@@ -42,11 +47,13 @@ class IllegalFunctionCall(Exception):
'''
pass
class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
class UnknownConnectionError(Exception):
pass

View File

@@ -39,6 +39,7 @@ def _gen_vid_long(video_id):
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _gen_vid(video_id):
"""generate video_id parameter.
Parameter
@@ -64,6 +65,7 @@ def _gen_vid(video_id):
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _nval(val):
"""convert value to byte array"""
if val < 0:
@@ -113,9 +115,7 @@ def _build(video_id, seektime, topchat_only):
body = reduce(lambda x, y: x + y, body)
return urllib.parse.quote(
b64enc(header_magic +
_nval(len(body)) +
body
b64enc(header_magic + _nval(len(body)) + body
).decode()
)

View File

@@ -22,7 +22,8 @@ class Parser:
if jsn is None:
raise ChatParseException('Called with none JSON object.')
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError('The video_id would be wrong, or video is deleted or private.')
raise ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
contents = jsn['response'].get('continuationContents')
return contents
@@ -50,17 +51,18 @@ class Parser:
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise NoContinuationsException('No Continuation')
metadata = (cont.get('invalidationContinuationData') or
cont.get('timedContinuationData') or
cont.get('reloadContinuationData') or
cont.get('liveChatReplayContinuationData')
metadata = (cont.get('invalidationContinuationData')
or cont.get('timedContinuationData')
or cont.get('reloadContinuationData')
or cont.get('liveChatReplayContinuationData')
)
if metadata is None:
if cont.get("playerSeekContinuationData"):
raise ChatParseException('Finished chat data')
unknown = list(cont.keys())[0]
if unknown:
raise ChatParseException(f"Received unknown continuation type:{unknown}")
raise ChatParseException(
f"Received unknown continuation type:{unknown}")
else:
raise ChatParseException('Cannot extract continuation data')
return self._create_data(metadata, contents)

View File

@@ -3,6 +3,7 @@ class ChatProcessor:
Abstract class that processes chat data.
Receive chat data (actions) from Listener.
'''
def process(self, chat_components: list):
'''
Interface that represents processing of chat data.
@@ -20,8 +21,3 @@ class ChatProcessor:
}
'''
pass

View File

@@ -1,5 +1,6 @@
from .chat_processor import ChatProcessor
class Combinator(ChatProcessor):
'''
Combinator combines multiple chat processors.
@@ -35,5 +36,3 @@ class Combinator(ChatProcessor):
'''
return tuple(processor.process(chat_components)
for processor in self.processors)

View File

@@ -1,5 +1,3 @@
import datetime
import time
from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer
@@ -58,7 +56,7 @@ class CompatibleProcessor(ChatProcessor):
rd = {}
try:
renderer = self.get_renderer(item)
if renderer == None:
if renderer is None:
return None
rd["kind"] = "youtube#liveChatMessage"

View File

@@ -1,11 +1,12 @@
import datetime, pytz
import datetime
import pytz
class BaseRenderer:
def __init__(self, item, chattype):
self.renderer = list(item.values())[0]
self.chattype = chattype
def get_snippet(self):
message = self.get_message(self.renderer)
@@ -22,7 +23,6 @@ class BaseRenderer:
}
}
def get_authordetails(self):
authorExternalChannelId = self.renderer.get("authorExternalChannelId")
# parse subscriber type
@@ -40,7 +40,6 @@ class BaseRenderer:
"isChatModerator": isChatModerator
}
def get_message(self, renderer):
message = ''
if renderer.get("message"):
@@ -80,4 +79,3 @@ class BaseRenderer:
dt = datetime.datetime.fromtimestamp(int(timestamp) / 1000000)
return dt.astimezone(pytz.utc).isoformat(
timespec='milliseconds').replace('+00:00', 'Z')

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsorEvent")
@@ -34,10 +36,7 @@ class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
"isChatModerator": isChatModerator
}
def get_message(self, renderer):
message = (renderer["eventText"]["runs"][0]["text"]
) + ' / ' + (renderer["detailText"]["simpleText"])
return message

View File

@@ -35,6 +35,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer):
}
def get_message(self, renderer):
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
message = ''.join([mes.get("text", "")
for mes in renderer["headerSubtext"]["runs"]])
return message, [message]

View File

@@ -3,6 +3,7 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superChatEvent")

View File

@@ -3,6 +3,7 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superStickerEvent")
@@ -44,5 +45,3 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
symbol = ""
amountMicros = 0
return amountDisplayString, symbol, amountMicros

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "textMessageEvent")

View File

@@ -62,7 +62,7 @@ class DefaultProcessor(ChatProcessor):
return None
try:
renderer = self._get_renderer(item)
if renderer == None:
if renderer is None:
return None
renderer.get_snippet()

View File

@@ -1,6 +1,10 @@
from datetime import datetime
class Author:
pass
class BaseRenderer:
def __init__(self, item, chattype):
self.renderer = list(item.values())[0]
@@ -38,8 +42,6 @@ class BaseRenderer:
self.author.name = self.renderer["authorName"]["simpleText"]
self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"]
def get_message(self, renderer):
message = ''
message_ex = []
@@ -50,14 +52,13 @@ class BaseRenderer:
if r:
if r.get('emoji'):
message += r['emoji'].get('shortcuts', [''])[0]
message_ex.append(r['emoji']['image']['thumbnails'][1].get('url'))
message_ex.append(
r['emoji']['image']['thumbnails'][1].get('url'))
else:
message += r.get('text', '')
message_ex.append(r.get('text', ''))
return message, message_ex
def get_badges(self, renderer):
self.author.type = ''
isVerified = False
@@ -82,12 +83,9 @@ class BaseRenderer:
self.get_badgeurl(badge)
return isVerified, isChatOwner, isChatSponsor, isChatModerator
def get_badgeurl(self, badge):
self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"]
def get_datetime(self, timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')

View File

@@ -1,18 +1,15 @@
from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsor")
def get_authordetails(self):
super().get_authordetails()
self.author.isChatSponsor = True
def get_message(self, renderer):
message = (renderer["eventText"]["runs"][0]["text"]
) + ' / ' + (renderer["detailText"]["simpleText"])
return message

View File

@@ -10,6 +10,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer):
self.author.isChatSponsor = True
def get_message(self, renderer):
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
message = ''.join([mes.get("text", "")
for mes in renderer["headerSubtext"]["runs"]])
return message, [message]

View File

@@ -3,11 +3,11 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superChat")
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount = (
@@ -15,11 +15,10 @@ class LiveChatPaidMessageRenderer(BaseRenderer):
)
self.amountValue = amount
self.amountString = amountDisplayString
self.currency= currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("bodyBackgroundColor", 0)
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)

View File

@@ -3,11 +3,11 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superSticker")
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount = (
@@ -15,11 +15,11 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
)
self.amountValue = amount
self.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("moneyChipBackgroundColor", 0)
self.sticker = "https:"+self.renderer["sticker"]["thumbnails"][0]["url"]
self.sticker = "https:" + \
self.renderer["sticker"]["thumbnails"][0]["url"]
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
@@ -31,9 +31,3 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
symbol = ""
amount = 0.0
return amountDisplayString, symbol, amount

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "textMessage")

View File

@@ -1,8 +1,10 @@
from .chat_processor import ChatProcessor
class DummyProcessor(ChatProcessor):
'''
Dummy processor just returns received chat_components directly.
'''
def process(self, chat_components: list):
return chat_components

View File

@@ -1,18 +1,18 @@
import csv
import os
import re
from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
,'type','authorChannel']
fmt_headers = ['datetime', 'elapsed', 'authorName',
'message', 'superchat', 'type', 'authorChannel']
HEADER_HTML = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
'''
class HTMLArchiver(ChatProcessor):
'''
HtmlArchiver saves chat data as HTML table format.
@@ -95,4 +95,3 @@ class HTMLArchiver(ChatProcessor):
html += '</tr>\n'
html += '</thead>\n'
return html

View File

@@ -1,4 +1,3 @@
import datetime
import json
import os
import re
@@ -6,6 +5,7 @@ from .chat_processor import ChatProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
class JsonfileArchiver(ChatProcessor):
"""
JsonfileArchiver saves chat data as text of JSON lines.
@@ -17,6 +17,7 @@ class JsonfileArchiver(ChatProcessor):
it is automatically saved under a different name
with suffix '(number)'
"""
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
@@ -32,14 +33,18 @@ class JsonfileArchiver(ChatProcessor):
total_lines : int :
count of total lines written to the file.
"""
if chat_components is None: return
if chat_components is None:
return
with open(self.save_path, mode='a', encoding='utf-8') as f:
for component in chat_components:
if component is None: continue
if component is None:
continue
chatdata = component.get('chatdata')
if chatdata is None: continue
if chatdata is None:
continue
for action in chatdata:
if action is None: continue
if action is None:
continue
json_line = json.dumps(action, ensure_ascii=False)
f.writelines(json_line + '\n')
self.line_counter += 1
@@ -62,5 +67,3 @@ class JsonfileArchiver(ChatProcessor):
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath

View File

@@ -1,10 +1,6 @@
import json
import os
import traceback
import datetime
import time
from .chat_processor import ChatProcessor
##version 2
class SimpleDisplayProcessor(ChatProcessor):
def process(self, chat_components: list):
@@ -17,28 +13,34 @@ class SimpleDisplayProcessor(ChatProcessor):
timeout += component.get('timeout', 0)
chatdata = component.get('chatdata')
if chatdata is None:break
if chatdata is None:
break
for action in chatdata:
if action is None:continue
if action.get('addChatItemAction') is None:continue
if action['addChatItemAction'].get('item') is None:continue
if action is None:
continue
if action.get('addChatItemAction') is None:
continue
if action['addChatItemAction'].get('item') is None:
continue
root = action['addChatItemAction']['item'].get('liveChatTextMessageRenderer')
root = action['addChatItemAction']['item'].get(
'liveChatTextMessageRenderer')
if root:
author_name = root['authorName']['simpleText']
message = self._parse_message(root.get('message'))
purchase_amount_text = ''
else:
root = ( action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer') or
action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer') )
root = (action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer')
or action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer'))
if root:
author_name = root['authorName']['simpleText']
message = self._parse_message(root.get('message'))
purchase_amount_text = root['purchaseAmountText']['simpleText']
else:
continue
chatlist.append(f'[{author_name}]: {message} {purchase_amount_text}')
chatlist.append(
f'[{author_name}]: {message} {purchase_amount_text}')
return {"timeout": timeout, "chatlist": chatlist}
def _parse_message(self, message):
@@ -57,5 +59,3 @@ class SimpleDisplayProcessor(ChatProcessor):
return tmp
else:
return ''

View File

@@ -5,6 +5,8 @@ Calculate speed of chat.
"""
import time
from .. chat_processor import ChatProcessor
class RingQueue:
"""
リング型キュー
@@ -77,6 +79,7 @@ class RingQueue:
def item_count(self):
return len(self.items)
class SpeedCalculator(ChatProcessor, RingQueue):
"""
チャットの勢いを計算する。
@@ -106,7 +109,6 @@ class SpeedCalculator(ChatProcessor, RingQueue):
self.speed = self._calc_speed()
return self.speed
def _calc_speed(self):
"""
RingQueue内のチャット勢い算出用データリストを元に、
@@ -120,8 +122,7 @@ class SpeedCalculator(ChatProcessor, RingQueue):
# キュー内の総チャット数
total = sum(item['chat_count'] for item in self.items)
# キュー内の最初と最後のチャットの時間差
duration = (self.items[self.last_pos]['endtime']
- self.items[self.first_pos]['starttime'])
duration = (self.items[self.last_pos]['endtime'] - self.items[self.first_pos]['starttime'])
if duration != 0:
return int(total * 60 / duration)
return 0
@@ -200,4 +201,3 @@ class SpeedCalculator(ChatProcessor, RingQueue):
'starttime': int(starttime / 1000000),
'endtime': int(endtime / 1000000)
})

View File

@@ -15,10 +15,12 @@ items_sticker = [
'liveChatPaidStickerRenderer'
]
class SuperchatCalculator(ChatProcessor):
"""
Calculate the amount of SuperChat by currency.
"""
def __init__(self):
self.results = {}
@@ -34,11 +36,13 @@ class SuperchatCalculator(ChatProcessor):
return self.results
for component in chat_components:
chatdata = component.get('chatdata')
if chatdata is None: continue
if chatdata is None:
continue
for action in chatdata:
renderer = self._get_item(action, items_paid) or \
self._get_item(action, items_sticker)
if renderer is None: continue
if renderer is None:
continue
symbol, amount = self._parse(renderer)
self.results.setdefault(symbol, 0)
self.results[symbol] += amount
@@ -69,6 +73,3 @@ class SuperchatCalculator(ChatProcessor):
continue
return None
return dict_body

View File

@@ -5,8 +5,9 @@ from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchatAmount'
,'authorType','authorChannel']
fmt_headers = ['datetime', 'elapsed', 'authorName', 'message',
'superchatAmount', 'authorType', 'authorChannel']
class TSVArchiver(ChatProcessor):
'''
@@ -64,7 +65,3 @@ class TSVArchiver(ChatProcessor):
c.author.type,
c.author.channelId
])

View File

@@ -17,6 +17,7 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
def _split(start, end, count, min_interval_sec=120):
"""
Split section from `start` to `end` into `count` pieces,
@@ -28,7 +29,6 @@ def _split(start, end, count, min_interval_sec = 120):
--------
List of the offset of each block's first chat data.
"""
if not (isinstance(start, int) or isinstance(start, float)) or \
not (isinstance(end, int) or isinstance(end, float)):
raise ValueError("start/end must be int or float")
@@ -40,7 +40,8 @@ def _split(start, end, count, min_interval_sec = 120):
raise ValueError("count must be equal to or greater than 1.")
if (end - start) / count < min_interval_sec:
count = int((end - start) / min_interval_sec)
if count == 0 : count = 1
if count == 0:
count = 1
interval = (end - start) / count
if count == 1:
@@ -48,8 +49,10 @@ def _split(start, end, count, min_interval_sec = 120):
return sorted(list(set([int(start + interval * j)
for j in range(count)])))
def ready_blocks(video_id, duration, div, callback):
if div <= 0: raise ValueError
if div <= 0:
raise ValueError
async def _get_blocks(video_id, duration, div, callback):
async with aiohttp.ClientSession() as session:
@@ -92,6 +95,7 @@ def ready_blocks(video_id, duration, div, callback):
_get_blocks(video_id, duration, div, callback))
return blocks
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
@@ -137,6 +141,7 @@ def fetch_patch(callback, blocks, video_id):
except CancelledError:
pass
async def _shutdown():
print("\nshutdown...")
tasks = [t for t in asyncio.all_tasks()
@@ -148,7 +153,7 @@ async def _shutdown():
except asyncio.CancelledError:
pass
def cancel():
loop = asyncio.get_event_loop()
loop.create_task(_shutdown())

View File

@@ -1,4 +1,3 @@
from . import parser
class Block:
"""Block object represents something like a box
to join chunk of chatdata.

View File

@@ -1,5 +1,6 @@
from . import parser
def check_duplicate(chatdata):
max_range = len(chatdata) - 1
tbl_offset = [None] * max_range
@@ -15,10 +16,8 @@ def check_duplicate(chatdata):
def is_duplicate(i, j):
return (
tbl_offset[i] == tbl_offset[j]
and
tbl_id[i] == tbl_id[j]
and
tbl_type[i] == tbl_type[j]
and tbl_id[i] == tbl_id[j]
and tbl_type[i] == tbl_type[j]
)
print("creating table...")
create_table(chatdata, max_range)
@@ -53,8 +52,7 @@ def check_duplicate_offset(chatdata):
def is_duplicate(i, j):
return (
tbl_offset[i] == tbl_offset[j]
and
tbl_id[i] == tbl_id[j]
and tbl_id[i] == tbl_id[j]
)
print("creating table...")
@@ -69,6 +67,7 @@ def check_duplicate_offset(chatdata):
for i in range(max_range - 1)
if is_duplicate(i, i + 1)]
def remove_duplicate_head(blocks):
if len(blocks) == 0 or len(blocks) == 1:
return blocks
@@ -86,17 +85,16 @@ def remove_duplicate_head(blocks):
type_1 = parser.get_type(blocks[index + 1].chat_data[0])
return (
blocks[index].first == blocks[index + 1].first
and
id_0 == id_1
and
type_0 == type_1
and id_0 == id_1
and type_0 == type_1
)
ret = [blocks[i] for i in range(len(blocks) - 1)
if (len(blocks[i].chat_data)>0 and
not is_duplicate_head(i) )]
if (len(blocks[i].chat_data) > 0
and not is_duplicate_head(i))]
ret.append(blocks[-1])
return ret
def remove_duplicate_tail(blocks):
if len(blocks) == 0 or len(blocks) == 1:
return blocks
@@ -112,16 +110,15 @@ def remove_duplicate_tail(blocks):
type_1 = parser.get_type(blocks[index].chat_data[-1])
return (
blocks[index - 1].last == blocks[index].last
and
id_0 == id_1
and
type_0 == type_1
and id_0 == id_1
and type_0 == type_1
)
ret = [blocks[i] for i in range(0, len(blocks))
if i == 0 or not is_duplicate_tail(i)]
return ret
def remove_overlap(blocks):
"""
Fix overlapped blocks after ready_blocks().
@@ -149,8 +146,8 @@ def remove_overlap(blocks):
return blocks
def _dump(blocks):
print(f"---------- first last end---")
print("---------- first last end---")
for i, block in enumerate(blocks):
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")
print(
f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")

View File

@@ -1,6 +1,5 @@
from . import asyncdl
from . import duplcheck
from . import parser
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
@@ -8,6 +7,7 @@ from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers = config.headers
class Extractor:
def __init__(self, video_id, div=1, callback=None, processor=None):
if not isinstance(div, int) or div < 1:
@@ -84,8 +84,9 @@ class Extractor:
if self.processor is None:
return data
return self.processor.process(
[{'video_id':None,'timeout':1,'chatdata' : (action
["replayChatItemAction"]["actions"][0] for action in data)}]
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
def cancel(self):

View File

@@ -1,4 +1,3 @@
import json
from ... import config
from ... exceptions import (
ResponseContextError,
@@ -7,6 +6,7 @@ from ... exceptions import (
logger = config.logger(__name__)
def parse(jsn):
"""
Parse replay chat data.
@@ -43,12 +43,12 @@ def parse(jsn):
def get_offset(item):
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].values())[0].get('id')
def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].keys())[0]

View File

@@ -2,6 +2,7 @@ from . import parser
from . block import Block
from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
@@ -12,6 +13,7 @@ class Patch(NamedTuple):
first: int = None
last: int = None
def fill(block: Block, patch: Patch):
block_end = block.end
if patch.last < block_end or block.is_last:
@@ -51,4 +53,3 @@ def set_patch(block:Block, patch:Patch):
block.chat_data.extend(patch.chats)
block.last = patch.last
block.remaining = block.end - block.last

View File

@@ -1,8 +1,8 @@
from . import parser
from . block import Block
from . patch import Patch, fill, split
from . patch import fill, split
from ... paramgen import arcparam
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
@@ -54,6 +54,7 @@ class ExtractWorker:
self.block.done = True
self.block = _search_new_block(self)
def _search_new_block(worker) -> Block:
index, undone_block = _get_undone_block(worker.blocks)
if undone_block is None:
@@ -74,6 +75,7 @@ def _search_new_block(worker) -> Block:
worker.blocks.insert(index + 1, new_block)
return new_block
def _get_undone_block(blocks) -> (int, Block):
min_interval_ms = 120000
max_remaining = 0

View File

@@ -2,7 +2,6 @@ import json
import re
import requests
from .. import config
from .. import util
from ..exceptions import InvalidVideoIdException
headers = config.headers
@@ -63,6 +62,7 @@ item_moving_thumbnail = [
"url"
]
class VideoInfo:
'''
VideoInfo object retrieves YouTube video information.
@@ -76,6 +76,7 @@ class VideoInfo:
InvalidVideoIdException :
Occurs when video_id does not exist on YouTube.
'''
def __init__(self, video_id):
self.video_id = video_id
text = self._get_page_text(video_id)

View File

@@ -1,6 +1,9 @@
import requests,json,datetime
import requests
import json
import datetime
from .. import config
def extract(url):
_session = requests.Session()
html = _session.get(url, headers=config.headers)
@@ -10,6 +13,6 @@ def extract(url):
def save(data, filename, extention):
with open(filename+"_"+(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
)+extention,mode ='w',encoding='utf-8') as f:
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
mode='w', encoding='utf-8') as f:
f.writelines(data)