diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 7c0b474..86bf198 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -27,4 +27,6 @@ from .api import ( SpeedCalculator, SuperchatCalculator, VideoInfo -) \ No newline at end of file +) + +# flake8: noqa \ No newline at end of file diff --git a/pytchat/api.py b/pytchat/api.py index ceb4da2..7c67436 100644 --- a/pytchat/api.py +++ b/pytchat/api.py @@ -14,3 +14,5 @@ from .processors.speed.calculator import SpeedCalculator from .processors.superchat.calculator import SuperchatCalculator from .tool.extract.extractor import Extractor from .tool.videoinfo import VideoInfo + +# flake8: noqa \ No newline at end of file diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index c3c16ac..060b4eb 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -1,33 +1,31 @@ import argparse -import os from pathlib import Path -from typing import List, Callable from .arguments import Arguments - from .. exceptions import InvalidVideoIdException, NoContentsException -from .. processors.tsv_archiver import TSVArchiver from .. processors.html_archiver import HTMLArchiver from .. tool.extract.extractor import Extractor from .. tool.videoinfo import VideoInfo from .. import __version__ ''' -Most of CLI modules refer to +Most of CLI modules refer to Petter Kraabøl's Twitch-Chat-Downloader https://github.com/PetterKraabol/Twitch-Chat-Downloader (MIT License) ''' + + def main(): - # Arguments + # Arguments parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, - help='Video IDs separated by commas without space.\n' - 'If ID starts with a hyphen (-), enclose the ID in square brackets.') + help='Video IDs separated by commas without space.\n' + 'If ID starts with a hyphen (-), enclose the ID in square brackets.') parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, - help='Output directory (end with "/"). default="./"', default='./') + help='Output directory (end with "/"). default="./"', default='./') parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', - help='Settings version') + help='Settings version') Arguments(parser.parse_args().__dict__) if Arguments().print_version: print(f'pytchat v{__version__}') @@ -37,24 +35,26 @@ def main(): if Arguments().video_ids: for video_id in Arguments().video_ids: if '[' in video_id: - video_id = video_id.replace('[','').replace(']','') + video_id = video_id.replace('[', '').replace(']', '') try: info = VideoInfo(video_id) print(f"Extracting...\n" f" video_id: {video_id}\n" f" channel: {info.get_channel_name()}\n" f" title: {info.get_title()}") - path = Path(Arguments().output+video_id+'.html') + path = Path(Arguments().output + video_id + '.html') print(f"output path: {path.resolve()}") - Extractor(video_id, - processor = HTMLArchiver(Arguments().output+video_id+'.html'), - callback = _disp_progress - ).extract() + Extractor(video_id, + processor=HTMLArchiver( + Arguments().output + video_id + '.html'), + callback=_disp_progress + ).extract() print("\nExtraction end.\n") except (InvalidVideoIdException, NoContentsException) as e: print(e) return parser.print_help() -def _disp_progress(a,b): - print('.',end="",flush=True) + +def _disp_progress(a, b): + print('.', end="", flush=True) diff --git a/pytchat/cli/arguments.py b/pytchat/cli/arguments.py index ab3f355..d6fea2b 100644 --- a/pytchat/cli/arguments.py +++ b/pytchat/cli/arguments.py @@ -2,12 +2,13 @@ from typing import Optional, Dict, Union, List from .singleton import Singleton ''' -This modules refer to +This modules refer to Petter Kraabøl's Twitch-Chat-Downloader https://github.com/PetterKraabol/Twitch-Chat-Downloader (MIT License) ''' + class Arguments(metaclass=Singleton): """ Arguments singleton @@ -18,11 +19,11 @@ class Arguments(metaclass=Singleton): OUTPUT: str = 'output' VIDEO: str = 'video' - def __init__(self, - arguments: Optional[Dict[str, Union[str, bool, int]]] = None): + def __init__(self, + arguments: Optional[Dict[str, Union[str, bool, int]]] = None): """ Initialize arguments - :param arguments: Arguments from cli + :param arguments: Arguments from cli (Optional to call singleton instance without parameters) """ @@ -35,5 +36,5 @@ class Arguments(metaclass=Singleton): self.video_ids: List[int] = [] # Videos if arguments[Arguments.Name.VIDEO]: - self.video_ids = [video_id - for video_id in arguments[Arguments.Name.VIDEO].split(',')] + self.video_ids = [video_id + for video_id in arguments[Arguments.Name.VIDEO].split(',')] diff --git a/pytchat/cli/singleton.py b/pytchat/cli/singleton.py index fdf1c2c..53a76f0 100644 --- a/pytchat/cli/singleton.py +++ b/pytchat/cli/singleton.py @@ -1,9 +1,11 @@ ''' -This modules refer to +This modules refer to Petter Kraabøl's Twitch-Chat-Downloader https://github.com/PetterKraabol/Twitch-Chat-Downloader (MIT License) ''' + + class Singleton(type): """ Abstract class for singletons @@ -16,4 +18,4 @@ class Singleton(type): return cls._instances[cls] def get_instance(cls, *args, **kwargs): - cls.__call__(*args, **kwargs) \ No newline at end of file + cls.__call__(*args, **kwargs) diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index a36d2b0..e24eb3f 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -1,11 +1,9 @@ -import logging from . import mylogger headers = { - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'} + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'} -def logger(module_name: str, loglevel = None): - module_logger = mylogger.get_logger(module_name, loglevel = loglevel) + +def logger(module_name: str, loglevel=None): + module_logger = mylogger.get_logger(module_name, loglevel=loglevel) return module_logger - - diff --git a/pytchat/config/mylogger.py b/pytchat/config/mylogger.py index 3df3fb6..d61e40a 100644 --- a/pytchat/config/mylogger.py +++ b/pytchat/config/mylogger.py @@ -1,31 +1,31 @@ -from logging import NullHandler, getLogger, StreamHandler, FileHandler, Formatter +from logging import NullHandler, getLogger, StreamHandler, FileHandler import logging from datetime import datetime -def get_logger(modname,loglevel=logging.DEBUG): +def get_logger(modname, loglevel=logging.DEBUG): logger = getLogger(modname) - if loglevel == None: + if loglevel is None: logger.addHandler(NullHandler()) return logger logger.setLevel(loglevel) - #create handler1 for showing info + # create handler1 for showing info handler1 = StreamHandler() - my_formatter = MyFormatter() + my_formatter = MyFormatter() handler1.setFormatter(my_formatter) - handler1.setLevel(loglevel) + handler1.setLevel(loglevel) logger.addHandler(handler1) - #create handler2 for recording log file + # create handler2 for recording log file if loglevel <= logging.DEBUG: handler2 = FileHandler(filename="log.txt", encoding='utf-8') handler2.setLevel(logging.ERROR) handler2.setFormatter(my_formatter) - logger.addHandler(handler2) return logger + class MyFormatter(logging.Formatter): def format(self, record): timestamp = ( @@ -35,4 +35,4 @@ class MyFormatter(logging.Formatter): lineno = str(record.lineno).rjust(4) message = record.getMessage() - return timestamp+'| '+module+' { '+funcname+':'+lineno+'} - '+message + return timestamp + '| ' + module + ' { ' + funcname + ':' + lineno + '} - ' + message diff --git a/pytchat/core_async/buffer.py b/pytchat/core_async/buffer.py index e93357e..9fbaac9 100644 --- a/pytchat/core_async/buffer.py +++ b/pytchat/core_async/buffer.py @@ -1,5 +1,7 @@ import asyncio + + class Buffer(asyncio.Queue): ''' チャットデータを格納するバッファの役割を持つFIFOキュー @@ -10,19 +12,20 @@ class Buffer(asyncio.Queue): 格納するチャットブロックの最大個数。0の場合は無限。 最大値を超える場合は古いチャットブロックから破棄される。 ''' - def __init__(self,maxsize = 0): + + def __init__(self, maxsize=0): super().__init__(maxsize) - - async def put(self,item): + + async def put(self, item): if item is None: - return + return if super().full(): super().get_nowait() await super().put(item) - def put_nowait(self,item): + def put_nowait(self, item): if item is None: - return + return if super().full(): super().get_nowait() super().put_nowait(item) @@ -32,4 +35,4 @@ class Buffer(asyncio.Queue): ret.append(await super().get()) while not super().empty(): ret.append(super().get_nowait()) - return ret \ No newline at end of file + return ret diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 29ae9d7..1853025 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -169,7 +169,7 @@ class LiveChatAsync: continuation, session, headers) metadata, chatdata = self._parser.parse(contents) - timeout = metadata['timeoutMs']/1000 + timeout = metadata['timeoutMs'] / 1000 chat_component = { "video_id": self.video_id, "timeout": timeout, @@ -177,14 +177,15 @@ class LiveChatAsync: } time_mark = time.time() if self._direct_mode: - processed_chat = self.processor.process([chat_component]) + processed_chat = self.processor.process( + [chat_component]) if isinstance(processed_chat, tuple): await self._callback(*processed_chat) else: await self._callback(processed_chat) else: await self._buffer.put(chat_component) - diff_time = timeout - (time.time()-time_mark) + diff_time = timeout - (time.time() - time_mark) await asyncio.sleep(diff_time) continuation = metadata.get('continuation') except ChatParseException as e: diff --git a/pytchat/core_multithread/buffer.py b/pytchat/core_multithread/buffer.py index 3898572..966f2e9 100644 --- a/pytchat/core_multithread/buffer.py +++ b/pytchat/core_multithread/buffer.py @@ -1,6 +1,7 @@ import queue + class Buffer(queue.Queue): ''' チャットデータを格納するバッファの役割を持つFIFOキュー @@ -11,28 +12,29 @@ class Buffer(queue.Queue): 格納するチャットブロックの最大個数。0の場合は無限。 最大値を超える場合は古いチャットブロックから破棄される。 ''' - def __init__(self,maxsize = 0): + + def __init__(self, maxsize=0): super().__init__(maxsize=maxsize) - - def put(self,item): + + def put(self, item): if item is None: - return + return if super().full(): super().get_nowait() else: super().put(item) - - def put_nowait(self,item): + + def put_nowait(self, item): if item is None: - return + return if super().full(): super().get_nowait() else: super().put_nowait(item) - + def get(self): ret = [] ret.append(super().get()) while not super().empty(): ret.append(super().get()) - return ret \ No newline at end of file + return ret diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index 5003331..c1aa0ad 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -156,7 +156,7 @@ class LiveChat: continuation, session, headers) metadata, chatdata = self._parser.parse(contents) - timeout = metadata['timeoutMs']/1000 + timeout = metadata['timeoutMs'] / 1000 chat_component = { "video_id": self.video_id, "timeout": timeout, @@ -172,7 +172,7 @@ class LiveChat: self._callback(processed_chat) else: self._buffer.put(chat_component) - diff_time = timeout - (time.time()-time_mark) + diff_time = timeout - (time.time() - time_mark) time.sleep(diff_time if diff_time > 0 else 0) continuation = metadata.get('continuation') except ChatParseException as e: diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index 11e5ba1..1f45829 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -4,18 +4,21 @@ class ChatParseException(Exception): ''' pass + class NoYtinitialdataException(ChatParseException): ''' Thrown when the video is not found. ''' pass + class ResponseContextError(ChatParseException): ''' Thrown when chat data is invalid. ''' pass + class NoLivechatRendererException(ChatParseException): ''' Thrown when livechatRenderer is missing in JSON. @@ -29,24 +32,28 @@ class NoContentsException(ChatParseException): ''' pass + class NoContinuationsException(ChatParseException): ''' Thrown when continuation is missing in ContinuationContents. ''' pass + class IllegalFunctionCall(Exception): ''' - Thrown when get () is called even though + Thrown when get () is called even though set_callback () has been executed. ''' pass + class InvalidVideoIdException(Exception): ''' Thrown when the video_id is not exist (VideoInfo). ''' pass + class UnknownConnectionError(Exception): - pass \ No newline at end of file + pass diff --git a/pytchat/paramgen/arcparam.py b/pytchat/paramgen/arcparam.py index a8048d5..c5f7e07 100644 --- a/pytchat/paramgen/arcparam.py +++ b/pytchat/paramgen/arcparam.py @@ -32,7 +32,7 @@ def _build(video_id, seektime, topchat_only) -> str: elif seektime == 0: timestamp = 1 else: - timestamp = int(seektime*1000000) + timestamp = int(seektime * 1000000) continuation = Continuation() entity = continuation.entity entity.header = _gen_vid(video_id) diff --git a/pytchat/paramgen/arcparam_mining.py b/pytchat/paramgen/arcparam_mining.py index d24deed..7e3525a 100644 --- a/pytchat/paramgen/arcparam_mining.py +++ b/pytchat/paramgen/arcparam_mining.py @@ -36,9 +36,10 @@ def _gen_vid_long(video_id): ] return urllib.parse.quote( - b64enc(reduce(lambda x, y: x+y, item)).decode() + b64enc(reduce(lambda x, y: x + y, item)).decode() ).encode() + def _gen_vid(video_id): """generate video_id parameter. Parameter @@ -50,7 +51,7 @@ def _gen_vid(video_id): bytes : base64 encoded video_id parameter. """ header_magic = b'\x0A\x0F\x1A\x0D\x0A' - header_id = video_id.encode() + header_id = video_id.encode() header_terminator = b'\x20\x01' item = [ @@ -61,9 +62,10 @@ def _gen_vid(video_id): ] return urllib.parse.quote( - b64enc(reduce(lambda x, y: x+y, item)).decode() + b64enc(reduce(lambda x, y: x + y, item)).decode() ).encode() + def _nval(val): """convert value to byte array""" if val < 0: @@ -84,19 +86,19 @@ def _build(video_id, seektime, topchat_only): if seektime == 0: times = b'' else: - times = _nval(int(seektime*1000)) + times = _nval(int(seektime * 1000)) if seektime > 0: - _len_time = b'\x5A' + (len(times)+1).to_bytes(1, 'big') + b'\x10' + _len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10' else: _len_time = b'' - + header_magic = b'\xA2\x9D\xB0\xD3\x04' - sep_0 = b'\x1A' - vid = _gen_vid(video_id) - _tag = b'\x40\x01' - timestamp1 = times - sep_1 = b'\x60\x04\x72\x02\x08' - terminator = b'\x78\x01' + sep_0 = b'\x1A' + vid = _gen_vid(video_id) + _tag = b'\x40\x01' + timestamp1 = times + sep_1 = b'\x60\x04\x72\x02\x08' + terminator = b'\x78\x01' body = [ sep_0, @@ -110,14 +112,12 @@ def _build(video_id, seektime, topchat_only): terminator ] - body = reduce(lambda x, y: x+y, body) + body = reduce(lambda x, y: x + y, body) return urllib.parse.quote( - b64enc(header_magic + - _nval(len(body)) + - body - ).decode() - ) + b64enc(header_magic + _nval(len(body)) + body + ).decode() + ) def getparam(video_id, seektime=0.0, topchat_only=False): diff --git a/pytchat/paramgen/liveparam.py b/pytchat/paramgen/liveparam.py index e0525fa..717443f 100644 --- a/pytchat/paramgen/liveparam.py +++ b/pytchat/paramgen/liveparam.py @@ -68,12 +68,12 @@ def _build(video_id, ts1, ts2, ts3, ts4, ts5, topchat_only) -> str: def _times(past_sec): n = int(time.time()) - _ts1 = n - random.uniform(0, 1*3) + _ts1 = n - random.uniform(0, 1 * 3) _ts2 = n - random.uniform(0.01, 0.99) _ts3 = n - past_sec + random.uniform(0, 1) - _ts4 = n - random.uniform(10*60, 60*60) + _ts4 = n - random.uniform(10 * 60, 60 * 60) _ts5 = n - random.uniform(0.01, 0.99) - return list(map(lambda x: int(x*1000000), [_ts1, _ts2, _ts3, _ts4, _ts5])) + return list(map(lambda x: int(x * 1000000), [_ts1, _ts2, _ts3, _ts4, _ts5])) def getparam(video_id, past_sec=0, topchat_only=False) -> str: diff --git a/pytchat/parser/live.py b/pytchat/parser/live.py index aa30562..5fd6bdb 100644 --- a/pytchat/parser/live.py +++ b/pytchat/parser/live.py @@ -22,7 +22,8 @@ class Parser: if jsn is None: raise ChatParseException('Called with none JSON object.') if jsn['response']['responseContext'].get('errors'): - raise ResponseContextError('The video_id would be wrong, or video is deleted or private.') + raise ResponseContextError( + 'The video_id would be wrong, or video is deleted or private.') contents = jsn['response'].get('continuationContents') return contents @@ -50,17 +51,18 @@ class Parser: cont = contents['liveChatContinuation']['continuations'][0] if cont is None: raise NoContinuationsException('No Continuation') - metadata = (cont.get('invalidationContinuationData') or - cont.get('timedContinuationData') or - cont.get('reloadContinuationData') or - cont.get('liveChatReplayContinuationData') + metadata = (cont.get('invalidationContinuationData') + or cont.get('timedContinuationData') + or cont.get('reloadContinuationData') + or cont.get('liveChatReplayContinuationData') ) if metadata is None: if cont.get("playerSeekContinuationData"): raise ChatParseException('Finished chat data') unknown = list(cont.keys())[0] if unknown: - raise ChatParseException(f"Received unknown continuation type:{unknown}") + raise ChatParseException( + f"Received unknown continuation type:{unknown}") else: raise ChatParseException('Cannot extract continuation data') return self._create_data(metadata, contents) diff --git a/pytchat/processors/chat_processor.py b/pytchat/processors/chat_processor.py index 6e62114..98d2227 100644 --- a/pytchat/processors/chat_processor.py +++ b/pytchat/processors/chat_processor.py @@ -3,11 +3,12 @@ class ChatProcessor: Abstract class that processes chat data. Receive chat data (actions) from Listener. ''' + def process(self, chat_components: list): ''' Interface that represents processing of chat data. - Called from LiveChat object. - + Called from LiveChat object. + Parameter ---------- chat_components: List[component] @@ -20,8 +21,3 @@ class ChatProcessor: } ''' pass - - - - - diff --git a/pytchat/processors/combinator.py b/pytchat/processors/combinator.py index c3a81b7..7784418 100644 --- a/pytchat/processors/combinator.py +++ b/pytchat/processors/combinator.py @@ -1,5 +1,6 @@ from .chat_processor import ChatProcessor + class Combinator(ChatProcessor): ''' Combinator combines multiple chat processors. @@ -8,11 +9,11 @@ class Combinator(ChatProcessor): For example: [constructor] chat = LiveChat("video_id", processor = ( Processor1(), Processor2(), Processor3() ) ) - + [receive return values] ret1, ret2, ret3 = chat.get() - - The return values are tuple of processed chat data, + + The return values are tuple of processed chat data, the order of return depends on parameter order. Parameter @@ -34,6 +35,4 @@ class Combinator(ChatProcessor): Tuple of chat data processed by each chat processor. ''' return tuple(processor.process(chat_components) - for processor in self.processors) - - + for processor in self.processors) diff --git a/pytchat/processors/compatible/processor.py b/pytchat/processors/compatible/processor.py index 23c5ef0..6b443e0 100644 --- a/pytchat/processors/compatible/processor.py +++ b/pytchat/processors/compatible/processor.py @@ -1,5 +1,3 @@ -import datetime -import time from .renderer.textmessage import LiveChatTextMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer @@ -39,7 +37,7 @@ class CompatibleProcessor(ChatProcessor): chat = self.parse(action) if chat: chatlist.append(chat) - ret["pollingIntervalMillis"] = int(timeout*1000) + ret["pollingIntervalMillis"] = int(timeout * 1000) ret["pageInfo"] = { "totalResults": len(chatlist), "resultsPerPage": len(chatlist), @@ -58,7 +56,7 @@ class CompatibleProcessor(ChatProcessor): rd = {} try: renderer = self.get_renderer(item) - if renderer == None: + if renderer is None: return None rd["kind"] = "youtube#liveChatMessage" diff --git a/pytchat/processors/compatible/renderer/base.py b/pytchat/processors/compatible/renderer/base.py index d9003f9..248a93a 100644 --- a/pytchat/processors/compatible/renderer/base.py +++ b/pytchat/processors/compatible/renderer/base.py @@ -1,68 +1,67 @@ -import datetime, pytz +import datetime +import pytz + class BaseRenderer: def __init__(self, item, chattype): self.renderer = list(item.values())[0] self.chattype = chattype - def get_snippet(self): message = self.get_message(self.renderer) return { - "type" : self.chattype, - "liveChatId" : "", - "authorChannelId" : self.renderer.get("authorExternalChannelId"), - "publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)), - "hasDisplayContent" : True, - "displayMessage" : message, + "type": self.chattype, + "liveChatId": "", + "authorChannelId": self.renderer.get("authorExternalChannelId"), + "publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)), + "hasDisplayContent": True, + "displayMessage": message, "textMessageDetails": { - "messageText" : message + "messageText": message } } - def get_authordetails(self): authorExternalChannelId = self.renderer.get("authorExternalChannelId") - #parse subscriber type + # parse subscriber type isVerified, isChatOwner, isChatSponsor, isChatModerator = ( self.get_badges(self.renderer) ) - return { - "channelId" : authorExternalChannelId, - "channelUrl" : "http://www.youtube.com/channel/"+authorExternalChannelId, - "displayName" : self.renderer["authorName"]["simpleText"], - "profileImageUrl" : self.renderer["authorPhoto"]["thumbnails"][1]["url"] , - "isVerified" : isVerified, - "isChatOwner" : isChatOwner, - "isChatSponsor" : isChatSponsor, - "isChatModerator" : isChatModerator - } + return { + "channelId": authorExternalChannelId, + "channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId, + "displayName": self.renderer["authorName"]["simpleText"], + "profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"], + "isVerified": isVerified, + "isChatOwner": isChatOwner, + "isChatSponsor": isChatSponsor, + "isChatModerator": isChatModerator + } - - def get_message(self,renderer): + def get_message(self, renderer): message = '' if renderer.get("message"): - runs=renderer["message"].get("runs") + runs = renderer["message"].get("runs") if runs: for r in runs: if r: if r.get('emoji'): - message += r['emoji'].get('shortcuts',[''])[0] + message += r['emoji'].get('shortcuts', [''])[0] else: - message += r.get('text','') + message += r.get('text', '') return message - def get_badges(self,renderer): + def get_badges(self, renderer): isVerified = False isChatOwner = False isChatSponsor = False isChatModerator = False - badges=renderer.get("authorBadges") + badges = renderer.get("authorBadges") if badges: for badge in badges: - author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"] + author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"] if author_type == '確認済み': isVerified = True if author_type == '所有者': @@ -72,12 +71,11 @@ class BaseRenderer: if author_type == 'モデレーター': isChatModerator = True return isVerified, isChatOwner, isChatSponsor, isChatModerator - + def get_id(self): return self.renderer.get('id') - - def get_publishedat(self,timestamp): - dt = datetime.datetime.fromtimestamp(int(timestamp)/1000000) + + def get_publishedat(self, timestamp): + dt = datetime.datetime.fromtimestamp(int(timestamp) / 1000000) return dt.astimezone(pytz.utc).isoformat( - timespec='milliseconds').replace('+00:00','Z') - \ No newline at end of file + timespec='milliseconds').replace('+00:00', 'Z') diff --git a/pytchat/processors/compatible/renderer/currency.py b/pytchat/processors/compatible/renderer/currency.py index 0ec60a8..00d683c 100644 --- a/pytchat/processors/compatible/renderer/currency.py +++ b/pytchat/processors/compatible/renderer/currency.py @@ -35,4 +35,4 @@ symbols = { "NOK\xa0": {"fxtext": "NOK", "jptext": "ノルウェー・クローネ"}, "BAM\xa0": {"fxtext": "BAM", "jptext": "ボスニア・兌換マルカ"}, "SGD\xa0": {"fxtext": "SGD", "jptext": "シンガポール・ドル"} -} \ No newline at end of file +} diff --git a/pytchat/processors/compatible/renderer/legacypaid.py b/pytchat/processors/compatible/renderer/legacypaid.py index 1b31631..b406c2c 100644 --- a/pytchat/processors/compatible/renderer/legacypaid.py +++ b/pytchat/processors/compatible/renderer/legacypaid.py @@ -1,4 +1,6 @@ from .base import BaseRenderer + + class LiveChatLegacyPaidMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "newSponsorEvent") @@ -8,36 +10,33 @@ class LiveChatLegacyPaidMessageRenderer(BaseRenderer): message = self.get_message(self.renderer) return { - "type" : self.chattype, - "liveChatId" : "", - "authorChannelId" : self.renderer.get("authorExternalChannelId"), - "publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)), - "hasDisplayContent" : True, - "displayMessage" : message, - + "type": self.chattype, + "liveChatId": "", + "authorChannelId": self.renderer.get("authorExternalChannelId"), + "publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)), + "hasDisplayContent": True, + "displayMessage": message, + } def get_authordetails(self): authorExternalChannelId = self.renderer.get("authorExternalChannelId") - #parse subscriber type + # parse subscriber type isVerified, isChatOwner, _, isChatModerator = ( self.get_badges(self.renderer) ) - return { - "channelId" : authorExternalChannelId, - "channelUrl" : "http://www.youtube.com/channel/"+authorExternalChannelId, - "displayName" : self.renderer["authorName"]["simpleText"], - "profileImageUrl" : self.renderer["authorPhoto"]["thumbnails"][1]["url"] , - "isVerified" : isVerified, - "isChatOwner" : isChatOwner, - "isChatSponsor" : True, - "isChatModerator" : isChatModerator - } + return { + "channelId": authorExternalChannelId, + "channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId, + "displayName": self.renderer["authorName"]["simpleText"], + "profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"], + "isVerified": isVerified, + "isChatOwner": isChatOwner, + "isChatSponsor": True, + "isChatModerator": isChatModerator + } - - def get_message(self,renderer): + def get_message(self, renderer): message = (renderer["eventText"]["runs"][0]["text"] - )+' / '+(renderer["detailText"]["simpleText"]) + ) + ' / ' + (renderer["detailText"]["simpleText"]) return message - - diff --git a/pytchat/processors/compatible/renderer/membership.py b/pytchat/processors/compatible/renderer/membership.py index 5721549..ced2d06 100644 --- a/pytchat/processors/compatible/renderer/membership.py +++ b/pytchat/processors/compatible/renderer/membership.py @@ -25,7 +25,7 @@ class LiveChatMembershipItemRenderer(BaseRenderer): ) return { "channelId": authorExternalChannelId, - "channelUrl": "http://www.youtube.com/channel/"+authorExternalChannelId, + "channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId, "displayName": self.renderer["authorName"]["simpleText"], "profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"], "isVerified": isVerified, @@ -35,6 +35,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer): } def get_message(self, renderer): - message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]]) + message = ''.join([mes.get("text", "") + for mes in renderer["headerSubtext"]["runs"]]) return message, [message] - diff --git a/pytchat/processors/compatible/renderer/paidmessage.py b/pytchat/processors/compatible/renderer/paidmessage.py index d5c2615..c47e75a 100644 --- a/pytchat/processors/compatible/renderer/paidmessage.py +++ b/pytchat/processors/compatible/renderer/paidmessage.py @@ -3,6 +3,7 @@ from . import currency from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") + class LiveChatPaidMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "superChatEvent") @@ -10,32 +11,32 @@ class LiveChatPaidMessageRenderer(BaseRenderer): def get_snippet(self): authorName = self.renderer["authorName"]["simpleText"] message = self.get_message(self.renderer) - amountDisplayString, symbol, amountMicros =( + amountDisplayString, symbol, amountMicros = ( self.get_amountdata(self.renderer) ) return { - "type" : self.chattype, - "liveChatId" : "", - "authorChannelId" : self.renderer.get("authorExternalChannelId"), - "publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)), - "hasDisplayContent" : True, - "displayMessage" : amountDisplayString+" from "+authorName+': \"'+ message+'\"', - "superChatDetails" : { - "amountMicros" : amountMicros, - "currency" : currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol, - "amountDisplayString" : amountDisplayString, - "tier" : 0, - "backgroundColor" : self.renderer.get("bodyBackgroundColor", 0) + "type": self.chattype, + "liveChatId": "", + "authorChannelId": self.renderer.get("authorExternalChannelId"), + "publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)), + "hasDisplayContent": True, + "displayMessage": amountDisplayString + " from " + authorName + ': \"' + message + '\"', + "superChatDetails": { + "amountMicros": amountMicros, + "currency": currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol, + "amountDisplayString": amountDisplayString, + "tier": 0, + "backgroundColor": self.renderer.get("bodyBackgroundColor", 0) } } - def get_amountdata(self,renderer): + def get_amountdata(self, renderer): amountDisplayString = renderer["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) - amountMicros = int(float(m.group(2).replace(',',''))*1000000) + amountMicros = int(float(m.group(2).replace(',', '')) * 1000000) else: symbol = "" amountMicros = 0 - return amountDisplayString, symbol, amountMicros \ No newline at end of file + return amountDisplayString, symbol, amountMicros diff --git a/pytchat/processors/compatible/renderer/paidsticker.py b/pytchat/processors/compatible/renderer/paidsticker.py index 20abf32..e7cc87d 100644 --- a/pytchat/processors/compatible/renderer/paidsticker.py +++ b/pytchat/processors/compatible/renderer/paidsticker.py @@ -3,46 +3,45 @@ from . import currency from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") + class LiveChatPaidStickerRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "superStickerEvent") def get_snippet(self): authorName = self.renderer["authorName"]["simpleText"] - amountDisplayString, symbol, amountMicros =( + amountDisplayString, symbol, amountMicros = ( self.get_amountdata(self.renderer) ) return { - "type" : self.chattype, - "liveChatId" : "", - "authorChannelId" : self.renderer.get("authorExternalChannelId"), - "publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)), - "hasDisplayContent" : True, - "displayMessage" : "Super Sticker " + amountDisplayString + " from "+authorName, - "superStickerDetails" : { - "superStickerMetaData" : { + "type": self.chattype, + "liveChatId": "", + "authorChannelId": self.renderer.get("authorExternalChannelId"), + "publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)), + "hasDisplayContent": True, + "displayMessage": "Super Sticker " + amountDisplayString + " from " + authorName, + "superStickerDetails": { + "superStickerMetaData": { "stickerId": "", "altText": "", - "language": "" + "language": "" }, - "amountMicros" : amountMicros, - "currency" : currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol, - "amountDisplayString" : amountDisplayString, - "tier" : 0, - "backgroundColor" : self.renderer.get("bodyBackgroundColor", 0) + "amountMicros": amountMicros, + "currency": currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol, + "amountDisplayString": amountDisplayString, + "tier": 0, + "backgroundColor": self.renderer.get("bodyBackgroundColor", 0) } } - def get_amountdata(self,renderer): + def get_amountdata(self, renderer): amountDisplayString = renderer["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) - amountMicros = int(float(m.group(2).replace(',',''))*1000000) + amountMicros = int(float(m.group(2).replace(',', '')) * 1000000) else: symbol = "" amountMicros = 0 return amountDisplayString, symbol, amountMicros - - diff --git a/pytchat/processors/compatible/renderer/textmessage.py b/pytchat/processors/compatible/renderer/textmessage.py index dae62f1..c40aca2 100644 --- a/pytchat/processors/compatible/renderer/textmessage.py +++ b/pytchat/processors/compatible/renderer/textmessage.py @@ -1,4 +1,6 @@ from .base import BaseRenderer + + class LiveChatTextMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "textMessageEvent") diff --git a/pytchat/processors/default/processor.py b/pytchat/processors/default/processor.py index 3ba14c0..c4f8f47 100644 --- a/pytchat/processors/default/processor.py +++ b/pytchat/processors/default/processor.py @@ -20,13 +20,13 @@ class Chatdata: if self.interval == 0: time.sleep(1) return - time.sleep(self.interval/len(self.items)) + time.sleep(self.interval / len(self.items)) async def tick_async(self): if self.interval == 0: await asyncio.sleep(1) return - await asyncio.sleep(self.interval/len(self.items)) + await asyncio.sleep(self.interval / len(self.items)) class DefaultProcessor(ChatProcessor): @@ -62,7 +62,7 @@ class DefaultProcessor(ChatProcessor): return None try: renderer = self._get_renderer(item) - if renderer == None: + if renderer is None: return None renderer.get_snippet() diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py index 4a1aa41..1e42619 100644 --- a/pytchat/processors/default/renderer/base.py +++ b/pytchat/processors/default/renderer/base.py @@ -1,6 +1,10 @@ from datetime import datetime + + class Author: pass + + class BaseRenderer: def __init__(self, item, chattype): self.renderer = list(item.values())[0] @@ -10,65 +14,62 @@ class BaseRenderer: def get_snippet(self): self.type = self.chattype self.id = self.renderer.get('id') - timestampUsec = int(self.renderer.get("timestampUsec",0)) - self.timestamp = int(timestampUsec/1000) + timestampUsec = int(self.renderer.get("timestampUsec", 0)) + self.timestamp = int(timestampUsec / 1000) tst = self.renderer.get("timestampText") if tst: self.elapsedTime = tst.get("simpleText") else: self.elapsedTime = "" self.datetime = self.get_datetime(timestampUsec) - self.message ,self.messageEx = self.get_message(self.renderer) - self.id = self.renderer.get('id') - self.amountValue= 0.0 + self.message, self.messageEx = self.get_message(self.renderer) + self.id = self.renderer.get('id') + self.amountValue = 0.0 self.amountString = "" - self.currency= "" + self.currency = "" self.bgColor = 0 def get_authordetails(self): self.author.badgeUrl = "" - (self.author.isVerified, - self.author.isChatOwner, - self.author.isChatSponsor, - self.author.isChatModerator) = ( + (self.author.isVerified, + self.author.isChatOwner, + self.author.isChatSponsor, + self.author.isChatModerator) = ( self.get_badges(self.renderer) ) self.author.channelId = self.renderer.get("authorExternalChannelId") - self.author.channelUrl = "http://www.youtube.com/channel/"+self.author.channelId - self.author.name = self.renderer["authorName"]["simpleText"] - self.author.imageUrl= self.renderer["authorPhoto"]["thumbnails"][1]["url"] - + self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId + self.author.name = self.renderer["authorName"]["simpleText"] + self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"] - - def get_message(self,renderer): + def get_message(self, renderer): message = '' message_ex = [] if renderer.get("message"): - runs=renderer["message"].get("runs") + runs = renderer["message"].get("runs") if runs: for r in runs: if r: if r.get('emoji'): - message += r['emoji'].get('shortcuts',[''])[0] - message_ex.append(r['emoji']['image']['thumbnails'][1].get('url')) + message += r['emoji'].get('shortcuts', [''])[0] + message_ex.append( + r['emoji']['image']['thumbnails'][1].get('url')) else: - message += r.get('text','') - message_ex.append(r.get('text','')) + message += r.get('text', '') + message_ex.append(r.get('text', '')) return message, message_ex - - - def get_badges(self,renderer): + def get_badges(self, renderer): self.author.type = '' isVerified = False isChatOwner = False isChatSponsor = False isChatModerator = False - badges=renderer.get("authorBadges") + badges = renderer.get("authorBadges") if badges: for badge in badges: if badge["liveChatAuthorBadgeRenderer"].get("icon"): - author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] + author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] self.author.type = author_type if author_type == 'VERIFIED': isVerified = True @@ -81,13 +82,10 @@ class BaseRenderer: self.author.type = 'MEMBER' self.get_badgeurl(badge) return isVerified, isChatOwner, isChatSponsor, isChatModerator - - def get_badgeurl(self,badge): + def get_badgeurl(self, badge): self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] - - - def get_datetime(self,timestamp): - dt = datetime.fromtimestamp(timestamp/1000000) - return dt.strftime('%Y-%m-%d %H:%M:%S') \ No newline at end of file + def get_datetime(self, timestamp): + dt = datetime.fromtimestamp(timestamp / 1000000) + return dt.strftime('%Y-%m-%d %H:%M:%S') diff --git a/pytchat/processors/default/renderer/currency.py b/pytchat/processors/default/renderer/currency.py index 37f353e..4d4c314 100644 --- a/pytchat/processors/default/renderer/currency.py +++ b/pytchat/processors/default/renderer/currency.py @@ -35,4 +35,4 @@ symbols = { "NOK\xa0": {"fxtext": "NOK", "jptext": "ノルウェー・クローネ"}, "BAM\xa0": {"fxtext": "BAM", "jptext": "ボスニア・兌換マルカ"}, "SGD\xa0": {"fxtext": "SGD", "jptext": "シンガポール・ドル"} -} \ No newline at end of file +} diff --git a/pytchat/processors/default/renderer/legacypaid.py b/pytchat/processors/default/renderer/legacypaid.py index 12dfde5..ee238cf 100644 --- a/pytchat/processors/default/renderer/legacypaid.py +++ b/pytchat/processors/default/renderer/legacypaid.py @@ -1,18 +1,15 @@ from .base import BaseRenderer + + class LiveChatLegacyPaidMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "newSponsor") - - def get_authordetails(self): super().get_authordetails() - self.author.isChatSponsor = True + self.author.isChatSponsor = True - - def get_message(self,renderer): + def get_message(self, renderer): message = (renderer["eventText"]["runs"][0]["text"] - )+' / '+(renderer["detailText"]["simpleText"]) + ) + ' / ' + (renderer["detailText"]["simpleText"]) return message - - diff --git a/pytchat/processors/default/renderer/membership.py b/pytchat/processors/default/renderer/membership.py index 726b617..7a7d100 100644 --- a/pytchat/processors/default/renderer/membership.py +++ b/pytchat/processors/default/renderer/membership.py @@ -10,6 +10,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer): self.author.isChatSponsor = True def get_message(self, renderer): - message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]]) + message = ''.join([mes.get("text", "") + for mes in renderer["headerSubtext"]["runs"]]) return message, [message] - diff --git a/pytchat/processors/default/renderer/paidmessage.py b/pytchat/processors/default/renderer/paidmessage.py index c215552..9e69ab4 100644 --- a/pytchat/processors/default/renderer/paidmessage.py +++ b/pytchat/processors/default/renderer/paidmessage.py @@ -3,30 +3,29 @@ from . import currency from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") + class LiveChatPaidMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "superChat") - def get_snippet(self): super().get_snippet() - amountDisplayString, symbol, amount =( + amountDisplayString, symbol, amount = ( self.get_amountdata(self.renderer) ) - self.amountValue= amount + self.amountValue = amount self.amountString = amountDisplayString - self.currency= currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol - self.bgColor= self.renderer.get("bodyBackgroundColor", 0) - + self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + symbol) else symbol + self.bgColor = self.renderer.get("bodyBackgroundColor", 0) - - def get_amountdata(self,renderer): + def get_amountdata(self, renderer): amountDisplayString = renderer["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) - amount = float(m.group(2).replace(',','')) + amount = float(m.group(2).replace(',', '')) else: symbol = "" amount = 0.0 - return amountDisplayString, symbol, amount \ No newline at end of file + return amountDisplayString, symbol, amount diff --git a/pytchat/processors/default/renderer/paidsticker.py b/pytchat/processors/default/renderer/paidsticker.py index 8ec4828..b474e71 100644 --- a/pytchat/processors/default/renderer/paidsticker.py +++ b/pytchat/processors/default/renderer/paidsticker.py @@ -3,37 +3,31 @@ from . import currency from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") + class LiveChatPaidStickerRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "superSticker") - def get_snippet(self): super().get_snippet() - amountDisplayString, symbol, amount =( + amountDisplayString, symbol, amount = ( self.get_amountdata(self.renderer) ) self.amountValue = amount self.amountString = amountDisplayString - self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol + self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + symbol) else symbol self.bgColor = self.renderer.get("moneyChipBackgroundColor", 0) - self.sticker = "https:"+self.renderer["sticker"]["thumbnails"][0]["url"] - + self.sticker = "https:" + \ + self.renderer["sticker"]["thumbnails"][0]["url"] - - def get_amountdata(self,renderer): + def get_amountdata(self, renderer): amountDisplayString = renderer["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) - amount = float(m.group(2).replace(',','')) + amount = float(m.group(2).replace(',', '')) else: symbol = "" amount = 0.0 return amountDisplayString, symbol, amount - - - - - - diff --git a/pytchat/processors/default/renderer/textmessage.py b/pytchat/processors/default/renderer/textmessage.py index 131f8b3..475a70d 100644 --- a/pytchat/processors/default/renderer/textmessage.py +++ b/pytchat/processors/default/renderer/textmessage.py @@ -1,4 +1,6 @@ from .base import BaseRenderer + + class LiveChatTextMessageRenderer(BaseRenderer): def __init__(self, item): super().__init__(item, "textMessage") diff --git a/pytchat/processors/dummy_processor.py b/pytchat/processors/dummy_processor.py index e2e406d..da02573 100644 --- a/pytchat/processors/dummy_processor.py +++ b/pytchat/processors/dummy_processor.py @@ -1,8 +1,10 @@ from .chat_processor import ChatProcessor + class DummyProcessor(ChatProcessor): ''' Dummy processor just returns received chat_components directly. ''' + def process(self, chat_components: list): return chat_components diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 9249cf4..397d31e 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -1,18 +1,18 @@ -import csv import os import re from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor -PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") -fmt_headers = ['datetime','elapsed','authorName','message','superchat' - ,'type','authorChannel'] +PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") +fmt_headers = ['datetime', 'elapsed', 'authorName', + 'message', 'superchat', 'type', 'authorChannel'] HEADER_HTML = ''' ''' + class HTMLArchiver(ChatProcessor): ''' HtmlArchiver saves chat data as HTML table format. @@ -21,7 +21,7 @@ class HTMLArchiver(ChatProcessor): def __init__(self, save_path): super().__init__() self.save_path = self._checkpath(save_path) - with open(self.save_path, mode='a', encoding = 'utf-8') as f: + with open(self.save_path, mode='a', encoding='utf-8') as f: f.write(HEADER_HTML) f.write('') f.writelines(self._parse_html_header(fmt_headers)) @@ -34,30 +34,30 @@ class HTMLArchiver(ChatProcessor): newpath = filepath counter = 0 while os.path.exists(newpath): - match = re.search(PATTERN,body) + match = re.search(PATTERN, body) if match: - counter=int(match[2])+1 + counter = int(match[2]) + 1 num_with_bracket = f'({str(counter)})' body = f'{match[1]}{num_with_bracket}' else: body = f'{body}({str(counter)})' - newpath = os.path.join(os.path.dirname(filepath),body+extention) + newpath = os.path.join(os.path.dirname(filepath), body + extention) return newpath def process(self, chat_components: list): """ Returns ---------- - dict : + dict : save_path : str : Actual save path of file. total_lines : int : count of total lines written to the file. """ - if chat_components is None or len (chat_components) == 0: + if chat_components is None or len(chat_components) == 0: return - with open(self.save_path, mode='a', encoding = 'utf-8') as f: + with open(self.save_path, mode='a', encoding='utf-8') as f: chats = self.processor.process(chat_components).items for c in chats: f.writelines( @@ -76,23 +76,22 @@ class HTMLArchiver(ChatProcessor): Comment out below line to prevent the table display from collapsing. ''' - #f.write('
') + # f.write('') def _parse_html_line(self, raw_line): html = '' - html+=' ' + html += ' ' for cell in raw_line: - html+=''+cell+'' - html+='\n' + html += '' + cell + '' + html += '\n' return html - - def _parse_html_header(self,raw_line): + + def _parse_html_header(self, raw_line): html = '' - html+='\n' - html+=' ' + html += '\n' + html += ' ' for cell in raw_line: - html+=''+cell+'' - html+='\n' - html+='\n' + html += '' + cell + '' + html += '\n' + html += '\n' return html - \ No newline at end of file diff --git a/pytchat/processors/jsonfile_archiver.py b/pytchat/processors/jsonfile_archiver.py index f533564..9ca6b39 100644 --- a/pytchat/processors/jsonfile_archiver.py +++ b/pytchat/processors/jsonfile_archiver.py @@ -1,10 +1,10 @@ -import datetime import json import os import re from .chat_processor import ChatProcessor -PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") +PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") + class JsonfileArchiver(ChatProcessor): """ @@ -13,39 +13,44 @@ class JsonfileArchiver(ChatProcessor): Parameter: ---------- save_path : str : - save path of file.If a file with the same name exists, + save path of file.If a file with the same name exists, it is automatically saved under a different name with suffix '(number)' """ - def __init__(self,save_path): + + def __init__(self, save_path): super().__init__() self.save_path = self._checkpath(save_path) self.line_counter = 0 - - def process(self,chat_components: list): + + def process(self, chat_components: list): """ Returns ---------- - dict : + dict : save_path : str : Actual save path of file. total_lines : int : count of total lines written to the file. """ - if chat_components is None: return - with open(self.save_path, mode='a', encoding = 'utf-8') as f: + if chat_components is None: + return + with open(self.save_path, mode='a', encoding='utf-8') as f: for component in chat_components: - if component is None: continue + if component is None: + continue chatdata = component.get('chatdata') - if chatdata is None: continue + if chatdata is None: + continue for action in chatdata: - if action is None: continue - json_line = json.dumps(action, ensure_ascii = False) - f.writelines(json_line+'\n') - self.line_counter+=1 - return { "save_path" : self.save_path, - "total_lines": self.line_counter } - + if action is None: + continue + json_line = json.dumps(action, ensure_ascii=False) + f.writelines(json_line + '\n') + self.line_counter += 1 + return {"save_path": self.save_path, + "total_lines": self.line_counter} + def _checkpath(self, filepath): splitter = os.path.splitext(os.path.basename(filepath)) body = splitter[0] @@ -53,14 +58,12 @@ class JsonfileArchiver(ChatProcessor): newpath = filepath counter = 0 while os.path.exists(newpath): - match = re.search(PATTERN,body) + match = re.search(PATTERN, body) if match: - counter=int(match[2])+1 + counter = int(match[2]) + 1 num_with_bracket = f'({str(counter)})' body = f'{match[1]}{num_with_bracket}' else: body = f'{body}({str(counter)})' - newpath = os.path.join(os.path.dirname(filepath),body+extention) + newpath = os.path.join(os.path.dirname(filepath), body + extention) return newpath - - diff --git a/pytchat/processors/simple_display_processor.py b/pytchat/processors/simple_display_processor.py index 1ca01ce..ba9472c 100644 --- a/pytchat/processors/simple_display_processor.py +++ b/pytchat/processors/simple_display_processor.py @@ -1,47 +1,49 @@ -import json -import os -import traceback -import datetime -import time from .chat_processor import ChatProcessor -##version 2 + + class SimpleDisplayProcessor(ChatProcessor): - + def process(self, chat_components: list): chatlist = [] timeout = 0 - + if chat_components is None: - return {"timeout":timeout, "chatlist":chatlist} + return {"timeout": timeout, "chatlist": chatlist} for component in chat_components: timeout += component.get('timeout', 0) chatdata = component.get('chatdata') - - if chatdata is None:break - for action in chatdata: - if action is None:continue - if action.get('addChatItemAction') is None:continue - if action['addChatItemAction'].get('item') is None:continue - root = action['addChatItemAction']['item'].get('liveChatTextMessageRenderer') - + if chatdata is None: + break + for action in chatdata: + if action is None: + continue + if action.get('addChatItemAction') is None: + continue + if action['addChatItemAction'].get('item') is None: + continue + + root = action['addChatItemAction']['item'].get( + 'liveChatTextMessageRenderer') + if root: author_name = root['authorName']['simpleText'] message = self._parse_message(root.get('message')) purchase_amount_text = '' else: - root = ( action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer') or - action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer') ) + root = (action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer') + or action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer')) if root: author_name = root['authorName']['simpleText'] message = self._parse_message(root.get('message')) purchase_amount_text = root['purchaseAmountText']['simpleText'] else: continue - chatlist.append(f'[{author_name}]: {message} {purchase_amount_text}') - return {"timeout":timeout, "chatlist":chatlist} - - def _parse_message(self,message): + chatlist.append( + f'[{author_name}]: {message} {purchase_amount_text}') + return {"timeout": timeout, "chatlist": chatlist} + + def _parse_message(self, message): if message is None: return '' if message.get('simpleText'): @@ -51,11 +53,9 @@ class SimpleDisplayProcessor(ChatProcessor): tmp = '' for run in runs: if run.get('emoji'): - tmp+=(run['emoji']['shortcuts'][0]) + tmp += (run['emoji']['shortcuts'][0]) elif run.get('text'): - tmp+=(run['text']) + tmp += (run['text']) return tmp else: return '' - - diff --git a/pytchat/processors/speed/calculator.py b/pytchat/processors/speed/calculator.py index d91c279..52d57df 100644 --- a/pytchat/processors/speed/calculator.py +++ b/pytchat/processors/speed/calculator.py @@ -5,10 +5,12 @@ Calculate speed of chat. """ import time from .. chat_processor import ChatProcessor + + class RingQueue: """ リング型キュー - + Attributes ---------- items : list @@ -21,10 +23,10 @@ class RingQueue: キュー内に余裕があるか。キュー内のアイテム個数が、キューの最大個数未満であればTrue。 """ - def __init__(self, capacity): + def __init__(self, capacity): """ コンストラクタ - + Parameter ---------- capacity:このキューに格納するアイテムの最大個数。 @@ -50,17 +52,17 @@ class RingQueue: """ if self.mergin: self.items.append(item) - self.last_pos = len(self.items)-1 - if self.last_pos == self.capacity-1: + self.last_pos = len(self.items) - 1 + if self.last_pos == self.capacity - 1: self.mergin = False return self.last_pos += 1 - if self.last_pos > self.capacity-1: + if self.last_pos > self.capacity - 1: self.last_pos = 0 self.items[self.last_pos] = item - + self.first_pos += 1 - if self.first_pos > self.capacity-1: + if self.first_pos > self.capacity - 1: self.first_pos = 0 def get(self): @@ -76,11 +78,12 @@ class RingQueue: def item_count(self): return len(self.items) - + + class SpeedCalculator(ChatProcessor, RingQueue): """ チャットの勢いを計算する。 - + 一定期間のチャットデータのうち、最初のチャットの投稿時刻と 最後のチャットの投稿時刻の差を、チャット数で割り返し 1分あたりの速度に換算する。 @@ -91,7 +94,7 @@ class SpeedCalculator(ChatProcessor, RingQueue): RingQueueに格納するチャット勢い算出用データの最大数 """ - def __init__(self, capacity = 10): + def __init__(self, capacity=10): super().__init__(capacity) self.speed = 0 @@ -105,7 +108,6 @@ class SpeedCalculator(ChatProcessor, RingQueue): self._put_chatdata(chatdata) self.speed = self._calc_speed() return self.speed - def _calc_speed(self): """ @@ -116,14 +118,13 @@ class SpeedCalculator(ChatProcessor, RingQueue): --------------------------- チャット速度(1分間で換算したチャット数) """ - try: - #キュー内の総チャット数 + try: + # キュー内の総チャット数 total = sum(item['chat_count'] for item in self.items) - #キュー内の最初と最後のチャットの時間差 - duration = (self.items[self.last_pos]['endtime'] - - self.items[self.first_pos]['starttime']) + # キュー内の最初と最後のチャットの時間差 + duration = (self.items[self.last_pos]['endtime'] - self.items[self.first_pos]['starttime']) if duration != 0: - return int(total*60/duration) + return int(total * 60 / duration) return 0 except IndexError: return 0 @@ -143,61 +144,60 @@ class SpeedCalculator(ChatProcessor, RingQueue): ''' チャットデータがない場合に空のデータをキューに投入する。 ''' - timestamp_now = int(time.time()) + timestamp_now = int(time.time()) self.put({ - 'chat_count':0, - 'starttime':int(timestamp_now), - 'endtime':int(timestamp_now) + 'chat_count': 0, + 'starttime': int(timestamp_now), + 'endtime': int(timestamp_now) }) - def _get_timestamp(action :dict): + def _get_timestamp(action: dict): """ チャットデータから時刻データを取り出す。 """ try: item = action['addChatItemAction']['item'] timestamp = int(item[list(item.keys())[0]]['timestampUsec']) - except (KeyError,TypeError): + except (KeyError, TypeError): return None return timestamp - if actions is None or len(actions)==0: + if actions is None or len(actions) == 0: _put_emptydata() - return - - #actions内の時刻データを持つチャットデータの数 - counter=0 - #actions内の最初のチャットデータの時刻 - starttime= None - #actions内の最後のチャットデータの時刻 - endtime=None - + return + + # actions内の時刻データを持つチャットデータの数 + counter = 0 + # actions内の最初のチャットデータの時刻 + starttime = None + # actions内の最後のチャットデータの時刻 + endtime = None + for action in actions: - #チャットデータからtimestampUsecを読み取る + # チャットデータからtimestampUsecを読み取る gettime = _get_timestamp(action) - - #時刻のないデータだった場合は次の行のデータで読み取り試行 + + # 時刻のないデータだった場合は次の行のデータで読み取り試行 if gettime is None: continue - - #最初に有効な時刻を持つデータのtimestampをstarttimeに設定 + + # 最初に有効な時刻を持つデータのtimestampをstarttimeに設定 if starttime is None: starttime = gettime - - #最後のtimestampを設定(途中で時刻のないデータの場合もあるので上書きしていく) + + # 最後のtimestampを設定(途中で時刻のないデータの場合もあるので上書きしていく) endtime = gettime - - #チャットの数をインクリメント + + # チャットの数をインクリメント counter += 1 - #チャット速度用のデータをRingQueueに送る + # チャット速度用のデータをRingQueueに送る if starttime is None or endtime is None: _put_emptydata() - return - - self.put({ - 'chat_count':counter, - 'starttime':int(starttime/1000000), - 'endtime':int(endtime/1000000) - }) + return + self.put({ + 'chat_count': counter, + 'starttime': int(starttime / 1000000), + 'endtime': int(endtime / 1000000) + }) diff --git a/pytchat/processors/superchat/calculator.py b/pytchat/processors/superchat/calculator.py index f62452f..fd60dc7 100644 --- a/pytchat/processors/superchat/calculator.py +++ b/pytchat/processors/superchat/calculator.py @@ -15,10 +15,12 @@ items_sticker = [ 'liveChatPaidStickerRenderer' ] + class SuperchatCalculator(ChatProcessor): """ Calculate the amount of SuperChat by currency. """ + def __init__(self): self.results = {} @@ -34,22 +36,24 @@ class SuperchatCalculator(ChatProcessor): return self.results for component in chat_components: chatdata = component.get('chatdata') - if chatdata is None: continue + if chatdata is None: + continue for action in chatdata: renderer = self._get_item(action, items_paid) or \ - self._get_item(action, items_sticker) - if renderer is None: continue + self._get_item(action, items_sticker) + if renderer is None: + continue symbol, amount = self._parse(renderer) - self.results.setdefault(symbol,0) - self.results[symbol]+=amount + self.results.setdefault(symbol, 0) + self.results[symbol] += amount return self.results - + def _parse(self, renderer): purchase_amount_text = renderer["purchaseAmountText"]["simpleText"] m = superchat_regex.search(purchase_amount_text) if m: symbol = m.group(1) - amount = float(m.group(2).replace(',','')) + amount = float(m.group(2).replace(',', '')) else: symbol = "" amount = 0.0 @@ -69,6 +73,3 @@ class SuperchatCalculator(ChatProcessor): continue return None return dict_body - - - diff --git a/pytchat/processors/tsv_archiver.py b/pytchat/processors/tsv_archiver.py index 170564c..8a4be45 100644 --- a/pytchat/processors/tsv_archiver.py +++ b/pytchat/processors/tsv_archiver.py @@ -4,9 +4,10 @@ import re from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor -PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") -fmt_headers = ['datetime','elapsed','authorName','message','superchatAmount' - ,'authorType','authorChannel'] +PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") +fmt_headers = ['datetime', 'elapsed', 'authorName', 'message', + 'superchatAmount', 'authorType', 'authorChannel'] + class TSVArchiver(ChatProcessor): ''' @@ -16,7 +17,7 @@ class TSVArchiver(ChatProcessor): def __init__(self, save_path): super().__init__() self.save_path = self._checkpath(save_path) - with open(self.save_path, mode='a', encoding = 'utf-8') as f: + with open(self.save_path, mode='a', encoding='utf-8') as f: writer = csv.writer(f, delimiter='\t') writer.writerow(fmt_headers) self.processor = DefaultProcessor() @@ -28,30 +29,30 @@ class TSVArchiver(ChatProcessor): newpath = filepath counter = 0 while os.path.exists(newpath): - match = re.search(PATTERN,body) + match = re.search(PATTERN, body) if match: - counter=int(match[2])+1 + counter = int(match[2]) + 1 num_with_bracket = f'({str(counter)})' body = f'{match[1]}{num_with_bracket}' else: body = f'{body}({str(counter)})' - newpath = os.path.join(os.path.dirname(filepath),body+extention) + newpath = os.path.join(os.path.dirname(filepath), body + extention) return newpath def process(self, chat_components: list): """ Returns ---------- - dict : + dict : save_path : str : Actual save path of file. total_lines : int : count of total lines written to the file. """ - if chat_components is None or len (chat_components) == 0: + if chat_components is None or len(chat_components) == 0: return - with open(self.save_path, mode='a', encoding = 'utf-8') as f: + with open(self.save_path, mode='a', encoding='utf-8') as f: writer = csv.writer(f, delimiter='\t') chats = self.processor.process(chat_components).items for c in chats: @@ -64,7 +65,3 @@ class TSVArchiver(ChatProcessor): c.author.type, c.author.channelId ]) - - - - \ No newline at end of file diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py index 084f037..7169be1 100644 --- a/pytchat/tool/extract/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -5,7 +5,7 @@ from . import parser from . block import Block from . worker import ExtractWorker from . patch import Patch -from ... import config +from ... import config from ... paramgen import arcparam from ... exceptions import UnknownConnectionError from concurrent.futures import CancelledError @@ -17,10 +17,11 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ "get_live_chat_replay?continuation=" MAX_RETRY_COUNT = 3 -def _split(start, end, count, min_interval_sec = 120): + +def _split(start, end, count, min_interval_sec=120): """ Split section from `start` to `end` into `count` pieces, - and returns the beginning of each piece. + and returns the beginning of each piece. The `count` is adjusted so that the length of each piece is no smaller than `min_interval`. @@ -28,41 +29,43 @@ def _split(start, end, count, min_interval_sec = 120): -------- List of the offset of each block's first chat data. """ - - if not (isinstance(start,int) or isinstance(start,float)) or \ - not (isinstance(end,int) or isinstance(end,float)): + if not (isinstance(start, int) or isinstance(start, float)) or \ + not (isinstance(end, int) or isinstance(end, float)): raise ValueError("start/end must be int or float") - if not isinstance(count,int): + if not isinstance(count, int): raise ValueError("count must be int") - if start>end: + if start > end: raise ValueError("end must be equal to or greater than start.") - if count<1: + if count < 1: raise ValueError("count must be equal to or greater than 1.") - if (end-start)/count < min_interval_sec: - count = int((end-start)/min_interval_sec) - if count == 0 : count = 1 - interval= (end-start)/count - + if (end - start) / count < min_interval_sec: + count = int((end - start) / min_interval_sec) + if count == 0: + count = 1 + interval = (end - start) / count + if count == 1: return [start] - return sorted( list(set( [int(start + interval*j) - for j in range(count) ]))) + return sorted(list(set([int(start + interval * j) + for j in range(count)]))) + def ready_blocks(video_id, duration, div, callback): - if div <= 0: raise ValueError + if div <= 0: + raise ValueError - async def _get_blocks( video_id, duration, div, callback): + async def _get_blocks(video_id, duration, div, callback): async with aiohttp.ClientSession() as session: - tasks = [_create_block(session, video_id, seektime, callback) - for seektime in _split(-1, duration, div)] + tasks = [_create_block(session, video_id, seektime, callback) + for seektime in _split(-1, duration, div)] return await asyncio.gather(*tasks) - + async def _create_block(session, video_id, seektime, callback): - continuation = arcparam.getparam(video_id, seektime = seektime) + continuation = arcparam.getparam(video_id, seektime=seektime) url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" for _ in range(MAX_RETRY_COUNT): - try : - async with session.get(url, headers = headers) as resp: + try: + async with session.get(url, headers=headers) as resp: text = await resp.text() next_continuation, actions = parser.parse(json.loads(text)) break @@ -76,41 +79,42 @@ def ready_blocks(video_id, duration, div, callback): first = parser.get_offset(actions[0]) last = parser.get_offset(actions[-1]) if callback: - callback(actions,last-first) + callback(actions, last - first) return Block( - continuation = next_continuation, - chat_data = actions, - first = first, - last = last + continuation=next_continuation, + chat_data=actions, + first=first, + last=last ) - + """ fetch initial blocks. - """ + """ loop = asyncio.get_event_loop() blocks = loop.run_until_complete( _get_blocks(video_id, duration, div, callback)) return blocks + def fetch_patch(callback, blocks, video_id): async def _allocate_workers(): workers = [ ExtractWorker( - fetch = _fetch, block = block, - blocks = blocks, video_id = video_id + fetch=_fetch, block=block, + blocks=blocks, video_id=video_id ) for block in blocks ] async with aiohttp.ClientSession() as session: tasks = [worker.run(session) for worker in workers] - return await asyncio.gather(*tasks) + return await asyncio.gather(*tasks) - async def _fetch(continuation,session) -> Patch: + async def _fetch(continuation, session) -> Patch: url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" for _ in range(MAX_RETRY_COUNT): try: - async with session.get(url,headers = config.headers) as resp: + async with session.get(url, headers=config.headers) as resp: chat_json = await resp.text() continuation, actions = parser.parse(json.loads(chat_json)) break @@ -126,21 +130,22 @@ def fetch_patch(callback, blocks, video_id): if callback: callback(actions, last - first) return Patch(actions, continuation, first, last) - return Patch(continuation = continuation) + return Patch(continuation=continuation) """ allocate workers and assign blocks. - """ + """ loop = asyncio.get_event_loop() try: loop.run_until_complete(_allocate_workers()) except CancelledError: pass + async def _shutdown(): print("\nshutdown...") tasks = [t for t in asyncio.all_tasks() - if t is not asyncio.current_task()] + if t is not asyncio.current_task()] for task in tasks: task.cancel() try: @@ -148,7 +153,7 @@ async def _shutdown(): except asyncio.CancelledError: pass + def cancel(): loop = asyncio.get_event_loop() loop.create_task(_shutdown()) - \ No newline at end of file diff --git a/pytchat/tool/extract/block.py b/pytchat/tool/extract/block.py index cd854e7..c827661 100644 --- a/pytchat/tool/extract/block.py +++ b/pytchat/tool/extract/block.py @@ -1,14 +1,13 @@ -from . import parser class Block: - """Block object represents something like a box + """Block object represents something like a box to join chunk of chatdata. Parameter: --------- first : int : - videoOffsetTimeMs of the first chat_data + videoOffsetTimeMs of the first chat_data (chat_data[0]) - + last : int : videoOffsetTimeMs of the last chat_data. (chat_data[-1]) @@ -23,15 +22,15 @@ class Block: continuation : str : continuation param of last chat data. - chat_data : list + chat_data : list done : bool : whether this block has been fetched. - + remaining : int : remaining data to extract. equals end - last. - + is_last : bool : whether this block is the last one in blocklist. @@ -39,13 +38,13 @@ class Block: whether this block is in the process of during_split. while True, this block is excluded from duplicate split procedure. """ - - __slots__ = ['first','last','end','continuation','chat_data','remaining', - 'done','is_last','during_split'] - def __init__(self, first = 0, last = 0, end = 0, - continuation = '', chat_data = [], is_last = False, - during_split = False): + __slots__ = ['first', 'last', 'end', 'continuation', 'chat_data', 'remaining', + 'done', 'is_last', 'during_split'] + + def __init__(self, first=0, last=0, end=0, + continuation='', chat_data=[], is_last=False, + during_split=False): self.first = first self.last = last self.end = end diff --git a/pytchat/tool/extract/duplcheck.py b/pytchat/tool/extract/duplcheck.py index e94c011..1ac18c1 100644 --- a/pytchat/tool/extract/duplcheck.py +++ b/pytchat/tool/extract/duplcheck.py @@ -1,7 +1,8 @@ from . import parser + def check_duplicate(chatdata): - max_range = len(chatdata)-1 + max_range = len(chatdata) - 1 tbl_offset = [None] * max_range tbl_id = [None] * max_range tbl_type = [None] * max_range @@ -9,33 +10,31 @@ def check_duplicate(chatdata): def create_table(chatdata, max_range): for i in range(max_range): tbl_offset[i] = parser.get_offset(chatdata[i]) - tbl_id[i] = parser.get_id(chatdata[i]) + tbl_id[i] = parser.get_id(chatdata[i]) tbl_type[i] = parser.get_type(chatdata[i]) def is_duplicate(i, j): - return ( + return ( tbl_offset[i] == tbl_offset[j] - and - tbl_id[i] == tbl_id[j] - and - tbl_type[i] == tbl_type[j] + and tbl_id[i] == tbl_id[j] + and tbl_type[i] == tbl_type[j] ) print("creating table...") - create_table(chatdata,max_range) + create_table(chatdata, max_range) print("searching duplicate data...") - return [{ "i":{ - "index" : i, "id" : parser.get_id(chatdata[i]), - "offsetTime" : parser.get_offset(chatdata[i]), - "type" : parser.get_type(chatdata[i]) - }, - "j":{ - "index" : j, "id" : parser.get_id(chatdata[j]), - "offsetTime" : parser.get_offset(chatdata[j]), - "type" : parser.get_type(chatdata[j]) - } - } - for i in range(max_range) for j in range(i+1,max_range) - if is_duplicate(i,j)] + return [{"i": { + "index": i, "id": parser.get_id(chatdata[i]), + "offsetTime": parser.get_offset(chatdata[i]), + "type": parser.get_type(chatdata[i]) + }, + "j":{ + "index": j, "id": parser.get_id(chatdata[j]), + "offsetTime": parser.get_offset(chatdata[j]), + "type": parser.get_type(chatdata[j]) + } + } + for i in range(max_range) for j in range(i + 1, max_range) + if is_duplicate(i, j)] def check_duplicate_offset(chatdata): @@ -47,27 +46,27 @@ def check_duplicate_offset(chatdata): def create_table(chatdata, max_range): for i in range(max_range): tbl_offset[i] = parser.get_offset(chatdata[i]) - tbl_id[i] = parser.get_id(chatdata[i]) + tbl_id[i] = parser.get_id(chatdata[i]) tbl_type[i] = parser.get_type(chatdata[i]) def is_duplicate(i, j): - return ( + return ( tbl_offset[i] == tbl_offset[j] - and - tbl_id[i] == tbl_id[j] + and tbl_id[i] == tbl_id[j] ) print("creating table...") - create_table(chatdata,max_range) + create_table(chatdata, max_range) print("searching duplicate data...") return [{ - "index" : i, "id" : tbl_id[i], - "offsetTime" : tbl_offset[i], - "type:" : tbl_type[i] - } - for i in range(max_range-1) - if is_duplicate(i,i+1)] + "index": i, "id": tbl_id[i], + "offsetTime": tbl_offset[i], + "type:": tbl_type[i] + } + for i in range(max_range - 1) + if is_duplicate(i, i + 1)] + def remove_duplicate_head(blocks): if len(blocks) == 0 or len(blocks) == 1: @@ -77,64 +76,62 @@ def remove_duplicate_head(blocks): if len(blocks[index].chat_data) == 0: return True - elif len(blocks[index+1].chat_data) == 0: + elif len(blocks[index + 1].chat_data) == 0: return False - + id_0 = parser.get_id(blocks[index].chat_data[0]) - id_1 = parser.get_id(blocks[index+1].chat_data[0]) + id_1 = parser.get_id(blocks[index + 1].chat_data[0]) type_0 = parser.get_type(blocks[index].chat_data[0]) - type_1 = parser.get_type(blocks[index+1].chat_data[0]) + type_1 = parser.get_type(blocks[index + 1].chat_data[0]) return ( - blocks[index].first == blocks[index+1].first - and - id_0 == id_1 - and - type_0 == type_1 + blocks[index].first == blocks[index + 1].first + and id_0 == id_1 + and type_0 == type_1 ) - ret = [blocks[i] for i in range(len(blocks)-1) - if (len(blocks[i].chat_data)>0 and - not is_duplicate_head(i) )] + ret = [blocks[i] for i in range(len(blocks) - 1) + if (len(blocks[i].chat_data) > 0 + and not is_duplicate_head(i))] ret.append(blocks[-1]) return ret + def remove_duplicate_tail(blocks): if len(blocks) == 0 or len(blocks) == 1: - return blocks + return blocks def is_duplicate_tail(index): if len(blocks[index].chat_data) == 0: return True - elif len(blocks[index-1].chat_data) == 0: + elif len(blocks[index - 1].chat_data) == 0: return False - id_0 = parser.get_id(blocks[index-1].chat_data[-1]) + id_0 = parser.get_id(blocks[index - 1].chat_data[-1]) id_1 = parser.get_id(blocks[index].chat_data[-1]) - type_0 = parser.get_type(blocks[index-1].chat_data[-1]) + type_0 = parser.get_type(blocks[index - 1].chat_data[-1]) type_1 = parser.get_type(blocks[index].chat_data[-1]) return ( - blocks[index-1].last == blocks[index].last - and - id_0 == id_1 - and - type_0 == type_1 + blocks[index - 1].last == blocks[index].last + and id_0 == id_1 + and type_0 == type_1 ) - ret = [blocks[i] for i in range(0,len(blocks)) - if i == 0 or not is_duplicate_tail(i) ] + ret = [blocks[i] for i in range(0, len(blocks)) + if i == 0 or not is_duplicate_tail(i)] return ret + def remove_overlap(blocks): """ Fix overlapped blocks after ready_blocks(). - Align the last offset of each block to the first offset + Align the last offset of each block to the first offset of next block (equals `end` offset of each block). """ if len(blocks) == 0 or len(blocks) == 1: - return blocks + return blocks for block in blocks: if block.is_last: break - if len(block.chat_data)==0: + if len(block.chat_data) == 0: continue block_end = block.end if block.last >= block_end: @@ -143,14 +140,14 @@ def remove_overlap(blocks): break block.chat_data.pop() block.last = parser.get_offset(line) - block.remaining=0 - block.done=True + block.remaining = 0 + block.done = True block.continuation = None return blocks - - + def _dump(blocks): - print(f"---------- first last end---") - for i,block in enumerate(blocks): - print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}") \ No newline at end of file + print("---------- first last end---") + for i, block in enumerate(blocks): + print( + f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}") diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index b3721b9..1110e14 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -1,16 +1,16 @@ from . import asyncdl -from . import duplcheck -from . import parser +from . import duplcheck from .. videoinfo import VideoInfo from ... import config from ... exceptions import InvalidVideoIdException logger = config.logger(__name__) -headers=config.headers +headers = config.headers + class Extractor: - def __init__(self, video_id, div = 1, callback = None, processor = None): - if not isinstance(div ,int) or div < 1: + def __init__(self, video_id, div=1, callback=None, processor=None): + if not isinstance(div, int) or div < 1: raise ValueError('div must be positive integer.') elif div > 10: div = 10 @@ -33,7 +33,7 @@ class Extractor: blocks = asyncdl.ready_blocks( self.video_id, self.duration, self.div, self.callback) self.blocks = [block for block in blocks if block] - return self + return self def _remove_duplicate_head(self): self.blocks = duplcheck.remove_duplicate_head(self.blocks) @@ -41,10 +41,10 @@ class Extractor: def _set_block_end(self): if len(self.blocks) > 0: - for i in range(len(self.blocks)-1): - self.blocks[i].end = self.blocks[i+1].first - self.blocks[-1].end = self.duration*1000 - self.blocks[-1].is_last =True + for i in range(len(self.blocks) - 1): + self.blocks[i].end = self.blocks[i + 1].first + self.blocks[-1].end = self.duration * 1000 + self.blocks[-1].is_last = True return self def _remove_overlap(self): @@ -62,7 +62,7 @@ class Extractor: def _combine(self): ret = [] for block in self.blocks: - ret.extend(block.chat_data) + ret.extend(block.chat_data) return ret def _execute_extract_operations(self): @@ -82,11 +82,12 @@ class Extractor: return [] data = self._execute_extract_operations() if self.processor is None: - return data + return data return self.processor.process( - [{'video_id':None,'timeout':1,'chatdata' : (action - ["replayChatItemAction"]["actions"][0] for action in data)}] - ) + [{'video_id': None, + 'timeout': 1, + 'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}] + ) def cancel(self): - asyncdl.cancel() \ No newline at end of file + asyncdl.cancel() diff --git a/pytchat/tool/extract/parser.py b/pytchat/tool/extract/parser.py index 806f9c8..9dc6989 100644 --- a/pytchat/tool/extract/parser.py +++ b/pytchat/tool/extract/parser.py @@ -1,12 +1,12 @@ -import json from ... import config -from ... exceptions import ( - ResponseContextError, - NoContentsException, - NoContinuationsException ) +from ... exceptions import ( + ResponseContextError, + NoContentsException, + NoContinuationsException) logger = config.logger(__name__) + def parse(jsn): """ Parse replay chat data. @@ -20,12 +20,12 @@ def parse(jsn): actions : list """ - if jsn is None: + if jsn is None: raise ValueError("parameter JSON is None") if jsn['response']['responseContext'].get('errors'): raise ResponseContextError( - 'video_id is invalid or private/deleted.') - contents=jsn['response'].get('continuationContents') + 'video_id is invalid or private/deleted.') + contents = jsn['response'].get('continuationContents') if contents is None: raise NoContentsException('No chat data.') @@ -43,12 +43,12 @@ def parse(jsn): def get_offset(item): return int(item['replayChatItemAction']["videoOffsetTimeMsec"]) + def get_id(item): return list((list(item['replayChatItemAction']["actions"][0].values() - )[0])['item'].values())[0].get('id') + )[0])['item'].values())[0].get('id') + def get_type(item): return list((list(item['replayChatItemAction']["actions"][0].values() - )[0])['item'].keys())[0] - - + )[0])['item'].keys())[0] diff --git a/pytchat/tool/extract/patch.py b/pytchat/tool/extract/patch.py index 83a2e6d..307bd0b 100644 --- a/pytchat/tool/extract/patch.py +++ b/pytchat/tool/extract/patch.py @@ -2,17 +2,19 @@ from . import parser from . block import Block from typing import NamedTuple + class Patch(NamedTuple): """ Patch represents chunk of chat data which is fetched by asyncdl.fetch_patch._fetch(). """ - chats : list = [] - continuation : str = None - first : int = None - last : int = None + chats: list = [] + continuation: str = None + first: int = None + last: int = None -def fill(block:Block, patch:Patch): + +def fill(block: Block, patch: Patch): block_end = block.end if patch.last < block_end or block.is_last: set_patch(block, patch) @@ -23,32 +25,31 @@ def fill(block:Block, patch:Patch): break patch.chats.pop() set_patch(block, patch._replace( - continuation = None, - last = line_offset - ) + continuation=None, + last=line_offset ) - block.remaining=0 - block.done=True + ) + block.remaining = 0 + block.done = True -def split(parent_block:Block, child_block:Block, patch:Patch): +def split(parent_block: Block, child_block: Block, patch: Patch): parent_block.during_split = False if patch.first <= parent_block.last: ''' When patch overlaps with parent_block, discard this block. ''' child_block.continuation = None - ''' Leave child_block.during_split == True + ''' Leave child_block.during_split == True to exclude from during_split sequence. ''' - return + return child_block.during_split = False child_block.first = patch.first parent_block.end = patch.first fill(child_block, patch) - -def set_patch(block:Block, patch:Patch): + +def set_patch(block: Block, patch: Patch): block.continuation = patch.continuation block.chat_data.extend(patch.chats) block.last = patch.last - block.remaining = block.end-block.last - + block.remaining = block.end - block.last diff --git a/pytchat/tool/extract/worker.py b/pytchat/tool/extract/worker.py index 5ef3ad0..bd23f32 100644 --- a/pytchat/tool/extract/worker.py +++ b/pytchat/tool/extract/worker.py @@ -1,8 +1,8 @@ -from . import parser from . block import Block -from . patch import Patch, fill, split +from . patch import fill, split from ... paramgen import arcparam + class ExtractWorker: """ ExtractWorker associates a download session with a block. @@ -17,18 +17,18 @@ class ExtractWorker: block : Block : Block object that includes chat_data - + blocks : list : List of Block(s) video_id : str : parent_block : Block : - the block from which current block is splitted + the block from which current block is splitted """ __slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block'] - def __init__(self, fetch, block, blocks, video_id ): + def __init__(self, fetch, block, blocks, video_id): self.block = block self.fetch = fetch self.blocks = blocks @@ -47,33 +47,35 @@ class ExtractWorker: if self.parent_block: split(self.parent_block, self.block, patch) self.parent_block = None - else: + else: fill(self.block, patch) if self.block.continuation is None: """finished fetching this block """ self.block.done = True self.block = _search_new_block(self) + def _search_new_block(worker) -> Block: index, undone_block = _get_undone_block(worker.blocks) if undone_block is None: - return Block(continuation = None) - mean = (undone_block.last + undone_block.end)/2 - continuation = arcparam.getparam(worker.video_id, seektime = mean/1000) + return Block(continuation=None) + mean = (undone_block.last + undone_block.end) / 2 + continuation = arcparam.getparam(worker.video_id, seektime=mean / 1000) worker.parent_block = undone_block worker.parent_block.during_split = True new_block = Block( - end = undone_block.end, - chat_data = [], - continuation = continuation, - during_split = True, - is_last = worker.parent_block.is_last) + end=undone_block.end, + chat_data=[], + continuation=continuation, + during_split=True, + is_last=worker.parent_block.is_last) '''swap last block''' if worker.parent_block.is_last: worker.parent_block.is_last = False - worker.blocks.insert(index+1, new_block) + worker.blocks.insert(index + 1, new_block) return new_block + def _get_undone_block(blocks) -> (int, Block): min_interval_ms = 120000 max_remaining = 0 diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index 314a2e0..13712dc 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -1,15 +1,14 @@ -import json +import json import re import requests from .. import config -from .. import util -from ..exceptions import InvalidVideoIdException +from ..exceptions import InvalidVideoIdException headers = config.headers pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);") -item_channel_id =[ +item_channel_id = [ "videoDetails", "embeddedPlayerOverlayVideoDetailsRenderer", "channelThumbnailEndpoint", @@ -29,7 +28,7 @@ item_response = [ "embedded_player_response" ] -item_author_image =[ +item_author_image = [ "videoDetails", "embeddedPlayerOverlayVideoDetailsRenderer", "channelThumbnail", @@ -63,6 +62,7 @@ item_moving_thumbnail = [ "url" ] + class VideoInfo: ''' VideoInfo object retrieves YouTube video information. @@ -76,6 +76,7 @@ class VideoInfo: InvalidVideoIdException : Occurs when video_id does not exist on YouTube. ''' + def __init__(self, video_id): self.video_id = video_id text = self._get_page_text(video_id) @@ -83,13 +84,13 @@ class VideoInfo: def _get_page_text(self, video_id): url = f"https://www.youtube.com/embed/{video_id}" - resp = requests.get(url, headers = headers) + resp = requests.get(url, headers=headers) resp.raise_for_status() return resp.text def _parse(self, text): result = re.search(pattern, text) - res= json.loads(result.group(1)) + res = json.loads(result.group(1)) response = self._get_item(res, item_response) if response is None: self._check_video_is_private(res.get("args")) @@ -98,7 +99,7 @@ class VideoInfo: raise InvalidVideoIdException( f"No renderer found in video_id: [{self.video_id}].") - def _check_video_is_private(self,args): + def _check_video_is_private(self, args): if args and args.get("video_id"): raise InvalidVideoIdException( f"video_id [{self.video_id}] is private or deleted.") @@ -130,8 +131,8 @@ class VideoInfo: def get_title(self): if self._renderer.get("title"): - return [''.join(run["text"]) - for run in self._renderer["title"]["runs"]][0] + return [''.join(run["text"]) + for run in self._renderer["title"]["runs"]][0] return None def get_channel_id(self): @@ -141,13 +142,13 @@ class VideoInfo: return None def get_author_image(self): - return self._get_item(self._renderer, item_author_image) + return self._get_item(self._renderer, item_author_image) def get_thumbnail(self): return self._get_item(self._renderer, item_thumbnail) def get_channel_name(self): return self._get_item(self._renderer, item_channel_name) - + def get_moving_thumbnail(self): return self._get_item(self._renderer, item_moving_thumbnail) diff --git a/pytchat/util/__init__.py b/pytchat/util/__init__.py index 9b9d1ab..9050d65 100644 --- a/pytchat/util/__init__.py +++ b/pytchat/util/__init__.py @@ -1,15 +1,18 @@ -import requests,json,datetime +import requests +import json +import datetime from .. import config + def extract(url): _session = requests.Session() html = _session.get(url, headers=config.headers) with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') - )+'test.json',mode ='w',encoding='utf-8') as f: - json.dump(html.json(),f,ensure_ascii=False) + ) + 'test.json', mode='w', encoding='utf-8') as f: + json.dump(html.json(), f, ensure_ascii=False) -def save(data,filename,extention): - with open(filename+"_"+(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') - )+extention,mode ='w',encoding='utf-8') as f: +def save(data, filename, extention): + with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention, + mode='w', encoding='utf-8') as f: f.writelines(data)