diff --git a/README.md b/README.md index 9c2267b..2d28adb 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,7 @@ pytchat is a python library for fetching youtube live chat. ## Description pytchat is a python library for fetching youtube live chat -without using youtube api, Selenium or BeautifulSoup. - -pytchatは、YouTubeチャットを閲覧するためのpythonライブラリです。 +without using Selenium or BeautifulSoup. Other features: + Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one. @@ -16,7 +14,7 @@ Other features: instead of web scraping. For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki).
-より詳細な解説は[wiki](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)を参照してください。 +[wiki (Japanese)](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp) ## Install ```python @@ -27,128 +25,55 @@ pip install pytchat ### CLI One-liner command. -Save chat data to html, with embedded custom emojis. +Save chat data to html with embedded custom emojis. +Show chat stream (--echo option). ```bash -$ pytchat -v https://www.youtube.com/watch?v=ZJ6Q4U_Vg6s -o "c:/temp/" +$ pytchat -v https://www.youtube.com/watch?v=uIx8l2xlYVY -o "c:/temp/" # options: # -v : Video ID or URL that includes ID # -o : output directory (default path: './') +# --echo : Show chats. # saved filename is [video_id].html ``` -### on-demand mode +### On-demand mode with simple non-buffered object. ```python -from pytchat import LiveChat -livechat = LiveChat(video_id = "Zvp1pJpie4I") -# It is also possible to specify a URL that includes the video ID: -# livechat = LiveChat("https://www.youtube.com/watch?v=Zvp1pJpie4I") -while livechat.is_alive(): - try: - chatdata = livechat.get() - for c in chatdata.items: - print(f"{c.datetime} [{c.author.name}]- {c.message}") - chatdata.tick() - except KeyboardInterrupt: - livechat.terminate() - break -``` - -### callback mode -```python -from pytchat import LiveChat -import time - -def main(): - livechat = LiveChat(video_id = "Zvp1pJpie4I", callback = disp) - while livechat.is_alive(): - #other background operation. - time.sleep(1) - livechat.terminate() - -#callback function (automatically called) -def disp(chatdata): - for c in chatdata.items: - print(f"{c.datetime} [{c.author.name}]- {c.message}") - chatdata.tick() - -if __name__ == '__main__': - main() - -``` - -### asyncio context: -```python -from pytchat import LiveChatAsync -from concurrent.futures import CancelledError -import asyncio - -async def main(): - livechat = LiveChatAsync("Zvp1pJpie4I", callback = func) - while livechat.is_alive(): - #other background operation. - await asyncio.sleep(3) - -#callback function is automatically called. -async def func(chatdata): - for c in chatdata.items: - print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}") - await chatdata.tick_async() - -if __name__ == '__main__': - try: - loop = asyncio.get_event_loop() - loop.run_until_complete(main()) - except CancelledError: - pass -``` - - -### youtube api compatible processor: -```python -from pytchat import LiveChat, CompatibleProcessor -import time - -chat = LiveChat("Zvp1pJpie4I", - processor = CompatibleProcessor() ) - +import pytchat +chat = pytchat.create(video_id="uIx8l2xlYVY") while chat.is_alive(): - try: - data = chat.get() - polling = data['pollingIntervalMillis']/1000 - for c in data['items']: - if c.get('snippet'): - print(f"[{c['authorDetails']['displayName']}]" - f"-{c['snippet']['displayMessage']}") - time.sleep(polling/len(data['items'])) - except KeyboardInterrupt: - chat.terminate() + for c in chat.get().sync_items(): + print(f"{c.datetime} [{c.author.name}]- {c.message}") ``` -### replay: -If specified video is not live, -automatically try to fetch archived chat data. +### Output JSON format (feature of [DefaultProcessor](DefaultProcessor)) ```python -from pytchat import LiveChat +import pytchat +import time -def main(): - #seektime (seconds): start position of chat. - chat = LiveChat("ojes5ULOqhc", seektime = 60*30) - print('Replay from 30:00') - try: - while chat.is_alive(): - data = chat.get() - for c in data.items: - print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}") - data.tick() - except KeyboardInterrupt: - chat.terminate() - -if __name__ == '__main__': - main() +chat = pytchat.create(video_id="uIx8l2xlYVY") +while chat.is_alive(): + print(chat.get().json()) + time.sleep(5) + ''' + # Each chat item can also be output in JSON format. + for c in chat.get().sync_items(): + print(c.json()) + ''' ``` -### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver). + + +### other +#### Fetch chat with buffer. +[LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat) + +#### Asyncio Context +[LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync) + +#### [YT API compatible chat processor]https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor) + +### [Extract archived chat data](https://github.com/taizan-hokuto/pytchat/wiki/Extractor) ```python from pytchat import HTMLArchiver, Extractor @@ -164,7 +89,7 @@ print("finished.") ``` ## Structure of Default Processor -Each item can be got with `items` function. +Each item can be got with `sync_items()` function. @@ -298,6 +223,9 @@ Most of source code of CLI refer to: [PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader) +Progress bar in CLI is based on: + +[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3) ## Author diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 3343b8c..ebd5725 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,13 +2,28 @@ pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.3.2' +__version__ = '0.4.0' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __url__ = 'https://github.com/taizan-hokuto/pytchat' -__all__ = ["core_async","core_multithread","processors"] + +from .exceptions import ( + ChatParseException, + ResponseContextError, + NoContents, + NoContinuation, + IllegalFunctionCall, + InvalidVideoIdException, + UnknownConnectionError, + RetryExceedMaxCount, + ChatDataFinished, + ReceivedUnknownContinuation, + FailedExtractContinuation, + VideoInfoParseError, + PatternUnmatchError +) from .api import ( cli, @@ -26,7 +41,7 @@ from .api import ( SimpleDisplayProcessor, SpeedCalculator, SuperchatCalculator, - VideoInfo + VideoInfo, + create ) - # flake8: noqa \ No newline at end of file diff --git a/pytchat/api.py b/pytchat/api.py index 7c67436..bf64e07 100644 --- a/pytchat/api.py +++ b/pytchat/api.py @@ -1,5 +1,6 @@ from . import cli from . import config +from .core import create from .core_multithread.livechat import LiveChat from .core_async.livechat import LiveChatAsync from .processors.chat_processor import ChatProcessor @@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator from .tool.extract.extractor import Extractor from .tool.videoinfo import VideoInfo +__all__ = [ + cli, + config, + LiveChat, + LiveChatAsync, + ChatProcessor, + CompatibleProcessor, + DummyProcessor, + DefaultProcessor, + Extractor, + HTMLArchiver, + TSVArchiver, + JsonfileArchiver, + SimpleDisplayProcessor, + SpeedCalculator, + SuperchatCalculator, + VideoInfo, + create +] + # flake8: noqa \ No newline at end of file diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index f8cffd3..0d28485 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError from pathlib import Path from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError from .arguments import Arguments +from .echo import Echo from .progressbar import ProgressBar from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError from .. processors.html_archiver import HTMLArchiver @@ -41,11 +42,13 @@ def main(): help='Save error data when error occurs(".dat" file)') parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', help='Show version') + parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true', + help='Show chats of specified video') Arguments(parser.parse_args().__dict__) if Arguments().print_version: - print(f'pytchat v{__version__} © 2019 taizan-hokuto') + print(f'pytchat v{__version__} © 2019,2020 taizan-hokuto') return # Extractor @@ -53,6 +56,20 @@ def main(): parser.print_help() return + # Echo + if Arguments().echo: + if len(Arguments().video_ids) > 1: + print("You can specify only one video ID.") + return + try: + Echo(Arguments().video_ids[0]).run() + except InvalidVideoIdException as e: + print("Invalid video id:", str(e)) + except Exception as e: + print(type(e), str(e)) + finally: + return + if not os.path.exists(Arguments().output): print("\nThe specified directory does not exist.:{}\n".format(Arguments().output)) return @@ -77,10 +94,13 @@ class Runner: path = util.checkpath(separated_path + video_id + '.html') try: info = VideoInfo(video_id) - except Exception as e: + except (PatternUnmatchError, JSONDecodeError) as e: print("Cannot parse video information.:{} {}".format(video_id, type(e))) if Arguments().save_error_data: - util.save(str(e), "ERR", ".dat") + util.save(str(e.doc), "ERR", ".dat") + continue + except Exception as e: + print("Cannot parse video information.:{} {}".format(video_id, type(e))) continue print(f"\n" diff --git a/pytchat/cli/arguments.py b/pytchat/cli/arguments.py index 0e1baf4..be720c8 100644 --- a/pytchat/cli/arguments.py +++ b/pytchat/cli/arguments.py @@ -19,6 +19,7 @@ class Arguments(metaclass=Singleton): OUTPUT: str = 'output_dir' VIDEO_IDS: str = 'video_id' SAVE_ERROR_DATA: bool = 'save_error_data' + ECHO: bool = 'echo' def __init__(self, arguments: Optional[Dict[str, Union[str, bool, int]]] = None): @@ -36,8 +37,9 @@ class Arguments(metaclass=Singleton): self.output: str = arguments[Arguments.Name.OUTPUT] self.video_ids: List[int] = [] self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] - + self.echo: bool = arguments[Arguments.Name.ECHO] # Videos + if arguments[Arguments.Name.VIDEO_IDS]: self.video_ids = [video_id for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')] diff --git a/pytchat/cli/echo.py b/pytchat/cli/echo.py new file mode 100644 index 0000000..95876ab --- /dev/null +++ b/pytchat/cli/echo.py @@ -0,0 +1,22 @@ +import pytchat +from ..exceptions import ChatDataFinished, NoContents +from ..util.extract_video_id import extract_video_id + + +class Echo: + def __init__(self, video_id): + self.video_id = extract_video_id(video_id) + + def run(self): + livechat = pytchat.create(self.video_id) + while livechat.is_alive(): + chatdata = livechat.get() + for c in chatdata.sync_items(): + print(f"{c.datetime} [{c.author.name}] {c.message} {c.amountString}") + + try: + livechat.raise_for_status() + except (ChatDataFinished, NoContents): + print("Chat finished.") + except Exception as e: + print(type(e), str(e)) diff --git a/pytchat/cli/progressbar.py b/pytchat/cli/progressbar.py index fe37591..ae6174d 100644 --- a/pytchat/cli/progressbar.py +++ b/pytchat/cli/progressbar.py @@ -1,5 +1,5 @@ ''' -This code for this progress bar is based on +This code is based on vladignatyev/progress.py https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 (MIT License) diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index 3a86a8a..e362819 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -1,4 +1,4 @@ -import logging +import logging # noqa from . import mylogger headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36', diff --git a/pytchat/core/__init__.py b/pytchat/core/__init__.py new file mode 100644 index 0000000..9b98c18 --- /dev/null +++ b/pytchat/core/__init__.py @@ -0,0 +1,7 @@ +from .pytchat import PytchatCore +from .. util.extract_video_id import extract_video_id + + +def create(video_id: str, **kwargs): + _vid = extract_video_id(video_id) + return PytchatCore(_vid, **kwargs) diff --git a/pytchat/core/pytchat.py b/pytchat/core/pytchat.py new file mode 100644 index 0000000..f3a8172 --- /dev/null +++ b/pytchat/core/pytchat.py @@ -0,0 +1,207 @@ +import httpx +import json +import signal +import time +import traceback +import urllib.parse +from ..parser.live import Parser +from .. import config +from .. import exceptions +from ..paramgen import liveparam, arcparam +from ..processors.default.processor import DefaultProcessor +from ..processors.combinator import Combinator +from ..util.extract_video_id import extract_video_id + +headers = config.headers +MAX_RETRY = 10 + + +class PytchatCore: + ''' + + Parameter + --------- + video_id : str + + seektime : int + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. + + processor : ChatProcessor + + interruptable : bool + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. + + force_replay : bool + force to fetch archived chat data, even if specified video is live. + + topchat_only : bool + If True, get only top chat. + + hold_exception : bool [default:True] + If True, when exceptions occur, the exception is held internally, + and can be raised by raise_for_status(). + + Attributes + --------- + _is_alive : bool + Flag to stop getting chat. + ''' + + _setup_finished = False + + def __init__(self, video_id, + seektime=-1, + processor=DefaultProcessor(), + interruptable=True, + force_replay=False, + topchat_only=False, + hold_exception=True, + logger=config.logger(__name__), + ): + self._video_id = extract_video_id(video_id) + self.seektime = seektime + if isinstance(processor, tuple): + self.processor = Combinator(processor) + else: + self.processor = processor + self._is_alive = True + self._is_replay = force_replay + self._hold_exception = hold_exception + self._exception_holder = None + self._parser = Parser( + is_replay=self._is_replay, + exception_holder=self._exception_holder + ) + self._first_fetch = True + self._fetch_url = "live_chat/get_live_chat?continuation=" + self._topchat_only = topchat_only + self._logger = logger + if interruptable: + signal.signal(signal.SIGINT, lambda a, b: self.terminate()) + self._setup() + + def _setup(self): + time.sleep(0.1) # sleep shortly to prohibit skipping fetching data + """Fetch first continuation parameter, + create and start _listen loop. + """ + self.continuation = liveparam.getparam(self._video_id, 3) + + def _get_chat_component(self): + + ''' Fetch chat data and store them into buffer, + get next continuaiton parameter and loop. + + Parameter + --------- + continuation : str + parameter for next chat data + ''' + try: + with httpx.Client(http2=True) as client: + if self.continuation and self._is_alive: + contents = self._get_contents(self.continuation, client, headers) + metadata, chatdata = self._parser.parse(contents) + timeout = metadata['timeoutMs'] / 1000 + chat_component = { + "video_id": self._video_id, + "timeout": timeout, + "chatdata": chatdata + } + self.continuation = metadata.get('continuation') + return chat_component + except exceptions.ChatParseException as e: + self._logger.debug(f"[{self._video_id}]{str(e)}") + self._raise_exception(e) + except (TypeError, json.JSONDecodeError) as e: + self._logger.error(f"{traceback.format_exc(limit=-1)}") + self._raise_exception(e) + + self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._raise_exception(exceptions.ChatDataFinished) + + def _get_contents(self, continuation, client, headers): + '''Get 'continuationContents' from livechat json. + If contents is None at first fetching, + try to fetch archive chat data. + + Return: + ------- + 'continuationContents' which includes metadata & chat data. + ''' + livechat_json = ( + self._get_livechat_json(continuation, client, headers) + ) + contents = self._parser.get_contents(livechat_json) + if self._first_fetch: + if contents is None or self._is_replay: + '''Try to fetch archive chat data.''' + self._parser.is_replay = True + self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" + continuation = arcparam.getparam( + self._video_id, self.seektime, self._topchat_only) + livechat_json = (self._get_livechat_json(continuation, client, headers)) + reload_continuation = self._parser.reload_continuation( + self._parser.get_contents(livechat_json)) + if reload_continuation: + livechat_json = (self._get_livechat_json( + reload_continuation, client, headers)) + contents = self._parser.get_contents(livechat_json) + self._is_replay = True + self._first_fetch = False + return contents + + def _get_livechat_json(self, continuation, client, headers): + ''' + Get json which includes chat data. + ''' + continuation = urllib.parse.quote(continuation) + livechat_json = None + err = None + url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" + for _ in range(MAX_RETRY + 1): + with client: + try: + livechat_json = client.get(url, headers=headers).json() + break + except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e: + err = e + time.sleep(2) + continue + else: + self._logger.error(f"[{self._video_id}]" + f"Exceeded retry count. Last error: {str(err)}") + self._raise_exception(exceptions.RetryExceedMaxCount()) + return livechat_json + + def get(self): + if self.is_alive(): + chat_component = self._get_chat_component() + return self.processor.process([chat_component]) + else: + return [] + + def is_replay(self): + return self._is_replay + + def is_alive(self): + return self._is_alive + + def terminate(self): + self._is_alive = False + self.processor.finalize() + + def raise_for_status(self): + if self._exception_holder is not None: + raise self._exception_holder + + def _raise_exception(self, exception: Exception = None): + self._is_alive = False + if self._hold_exception is False: + raise exception + self._exception_holder = exception diff --git a/pytchat/core_async/buffer.py b/pytchat/core_async/buffer.py index 9fbaac9..e985088 100644 --- a/pytchat/core_async/buffer.py +++ b/pytchat/core_async/buffer.py @@ -4,13 +4,13 @@ import asyncio class Buffer(asyncio.Queue): ''' - チャットデータを格納するバッファの役割を持つFIFOキュー + Buffer for storing chat data. Parameter --------- maxsize : int - 格納するチャットブロックの最大個数。0の場合は無限。 - 最大値を超える場合は古いチャットブロックから破棄される。 + Maximum number of chat blocks to be stored. + If it exceeds the maximum, the oldest chat block will be discarded. ''' def __init__(self, maxsize=0): diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 9ec5512..5dccb4c 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -22,54 +22,51 @@ MAX_RETRY = 10 class LiveChatAsync: - '''asyncioを利用してYouTubeのライブ配信のチャットデータを取得する。 + '''LiveChatAsync object fetches chat data and stores them + in a buffer with asyncio. Parameter --------- video_id : str - 動画ID seektime : int - (ライブチャット取得時は無視) - 取得開始するアーカイブ済みチャットの経過時間(秒) - マイナス値を指定した場合は、配信開始前のチャットも取得する。 + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. processor : ChatProcessor - チャットデータを加工するオブジェクト - buffer : Buffer(maxsize:20[default]) - チャットデータchat_componentを格納するバッファ。 - maxsize : 格納できるchat_componentの個数 - default値20個。1個で約5~10秒分。 + buffer : Buffer + buffer of chat data fetched background. interruptable : bool - Ctrl+Cによる処理中断を行うかどうか。 + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. callback : func - _listen()関数から一定間隔で自動的に呼びだす関数。 + function called periodically from _listen(). done_callback : func - listener終了時に呼び出すコールバック。 + function called when listener ends. exception_handler : func - 例外を処理する関数 direct_mode : bool - Trueの場合、bufferを使わずにcallbackを呼ぶ。 - Trueの場合、callbackの設定が必須 - (設定していない場合IllegalFunctionCall例外を発生させる) + If True, invoke specified callback function without using buffer. + callback is required. If not, IllegalFunctionCall will be raised. force_replay : bool - Trueの場合、ライブチャットが取得できる場合であっても - 強制的にアーカイブ済みチャットを取得する。 + force to fetch archived chat data, even if specified video is live. topchat_only : bool - Trueの場合、上位チャットのみ取得する。 + If True, get only top chat. Attributes --------- _is_alive : bool - チャット取得を停止するためのフラグ + Flag to stop getting chat. ''' _setup_finished = False @@ -114,31 +111,30 @@ class LiveChatAsync: self._set_exception_handler(exception_handler) if interruptable: signal.signal(signal.SIGINT, - (lambda a, b: asyncio.create_task( - LiveChatAsync.shutdown(None, signal.SIGINT, b)))) + (lambda a, b: self._keyboard_interrupt())) self._setup() def _setup(self): - # direct modeがTrueでcallback未設定の場合例外発生。 + # An exception is raised when direct mode is true and no callback is set. if self._direct_mode: if self._callback is None: raise exceptions.IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: - # direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 + # Create a default buffer if `direct_mode` is False and buffer is not set. if self._buffer is None: self._buffer = Buffer(maxsize=20) - # callbackが指定されている場合はcallbackを呼ぶループタスクを作成 + # Create a loop task to call callback if the `callback` param is specified. if self._callback is None: pass else: - # callbackを呼ぶループタスクの開始 + # Create a loop task to call callback if the `callback` param is specified. loop = asyncio.get_event_loop() loop.create_task(self._callback_loop(self._callback)) - # _listenループタスクの開始 + # Start a loop task for _listen() loop = asyncio.get_event_loop() self.listen_task = loop.create_task(self._startlisten()) - # add_done_callbackの登録 + # Register add_done_callback if self._done_callback is None: self.listen_task.add_done_callback(self._finish) else: @@ -194,7 +190,7 @@ class LiveChatAsync: self._logger.error(f"{traceback.format_exc(limit = -1)}") raise - self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._logger.debug(f"[{self._video_id}] finished fetching chat.") raise exceptions.ChatDataFinished async def _check_pause(self, continuation): @@ -246,30 +242,30 @@ class LiveChatAsync: ''' continuation = urllib.parse.quote(continuation) livechat_json = None - status_code = 0 url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" for _ in range(MAX_RETRY + 1): try: resp = await client.get(url, headers=headers) livechat_json = resp.json() break - except (httpx.HTTPError, json.JSONDecodeError): + except (json.JSONDecodeError, httpx.HTTPError): await asyncio.sleep(1) continue else: self._logger.error(f"[{self._video_id}]" - f"Exceeded retry count. status_code={status_code}") + f"Exceeded retry count.") return None return livechat_json async def _callback_loop(self, callback): - """ コンストラクタでcallbackを指定している場合、バックグラウンドで - callbackに指定された関数に一定間隔でチャットデータを投げる。 + """ If a callback is specified in the constructor, + it throws chat data at regular intervals to the + function specified in the callback in the backgroun Parameter --------- callback : func - 加工済みのチャットデータを渡す先の関数。 + function to which the processed chat data is passed. """ while self.is_alive(): items = await self._buffer.get() @@ -280,11 +276,13 @@ class LiveChatAsync: await self._callback(processed_chat) async def get(self): - """ bufferからデータを取り出し、processorに投げ、 - 加工済みのチャットデータを返す。 + """ + Retrieves data from the buffer, + throws it to the processor, + and returns the processed chat data. Returns - : Processorによって加工されたチャットデータ + : Chat data processed by the Processor """ if self._callback is None: if self.is_alive(): @@ -293,7 +291,7 @@ class LiveChatAsync: else: return [] raise exceptions.IllegalFunctionCall( - "既にcallbackを登録済みのため、get()は実行できません。") + "Callback parameter is already set, so get() cannot be performed.") def is_replay(self): return self._is_replay @@ -314,11 +312,11 @@ class LiveChatAsync: return self._is_alive def _finish(self, sender): - '''Listener終了時のコールバック''' + '''Called when the _listen() task finished.''' try: self._task_finished() except CancelledError: - self._logger.debug(f'[{self._video_id}]cancelled:{sender}') + self._logger.debug(f'[{self._video_id}] cancelled:{sender}') def terminate(self): if self._pauser.empty(): @@ -326,10 +324,14 @@ class LiveChatAsync: self._is_alive = False self._buffer.put_nowait({}) self.processor.finalize() - + + def _keyboard_interrupt(self): + self.exception = exceptions.ChatDataFinished() + self.terminate() + def _task_finished(self): ''' - Listenerを終了する。 + Terminate fetching chats. ''' if self.is_alive(): self.terminate() @@ -339,7 +341,7 @@ class LiveChatAsync: self.exception = e if not isinstance(e, exceptions.ChatParseException): self._logger.error(f'Internal exception - {type(e)}{str(e)}') - self._logger.info(f'[{self._video_id}]終了しました') + self._logger.info(f'[{self._video_id}] finished.') def raise_for_status(self): if self.exception is not None: @@ -349,15 +351,3 @@ class LiveChatAsync: def _set_exception_handler(cls, handler): loop = asyncio.get_event_loop() loop.set_exception_handler(handler) - - @classmethod - async def shutdown(cls, event, sig=None, handler=None): - cls._logger.debug("shutdown...") - tasks = [t for t in asyncio.all_tasks() if t is not - asyncio.current_task()] - [task.cancel() for task in tasks] - - cls._logger.debug("complete remaining tasks...") - await asyncio.gather(*tasks, return_exceptions=True) - loop = asyncio.get_event_loop() - loop.stop() diff --git a/pytchat/core_multithread/buffer.py b/pytchat/core_multithread/buffer.py index 966f2e9..8ead646 100644 --- a/pytchat/core_multithread/buffer.py +++ b/pytchat/core_multithread/buffer.py @@ -4,13 +4,13 @@ import queue class Buffer(queue.Queue): ''' - チャットデータを格納するバッファの役割を持つFIFOキュー + Buffer for storing chat data. Parameter --------- - max_size : int - 格納するチャットブロックの最大個数。0の場合は無限。 - 最大値を超える場合は古いチャットブロックから破棄される。 + maxsize : int + Maximum number of chat blocks to be stored. + If it exceeds the maximum, the oldest chat block will be discarded. ''' def __init__(self, maxsize=0): diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index f096d3f..8e025c2 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -21,54 +21,53 @@ MAX_RETRY = 10 class LiveChat: - ''' スレッドプールを利用してYouTubeのライブ配信のチャットデータを取得する + ''' + LiveChat object fetches chat data and stores them + in a buffer with ThreadpoolExecutor. Parameter --------- video_id : str - 動画ID seektime : int - (ライブチャット取得時は無視) - 取得開始するアーカイブ済みチャットの経過時間(秒) - マイナス値を指定した場合は、配信開始前のチャットも取得する。 + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. processor : ChatProcessor - チャットデータを加工するオブジェクト - buffer : Buffer(maxsize:20[default]) - チャットデータchat_componentを格納するバッファ。 - maxsize : 格納できるchat_componentの個数 - default値20個。1個で約5~10秒分。 + buffer : Buffer + buffer of chat data fetched background. interruptable : bool - Ctrl+Cによる処理中断を行うかどうか。 + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. callback : func - _listen()関数から一定間隔で自動的に呼びだす関数。 + function called periodically from _listen(). done_callback : func - listener終了時に呼び出すコールバック。 + function called when listener ends. direct_mode : bool - Trueの場合、bufferを使わずにcallbackを呼ぶ。 - Trueの場合、callbackの設定が必須 - (設定していない場合IllegalFunctionCall例外を発生させる) + If True, invoke specified callback function without using buffer. + callback is required. If not, IllegalFunctionCall will be raised. force_replay : bool - Trueの場合、ライブチャットが取得できる場合であっても - 強制的にアーカイブ済みチャットを取得する。 + force to fetch archived chat data, even if specified video is live. topchat_only : bool - Trueの場合、上位チャットのみ取得する。 + If True, get only top chat. Attributes --------- _executor : ThreadPoolExecutor - チャットデータ取得ループ(_listen)用のスレッド + This is used for _listen() loop. _is_alive : bool - チャット取得を停止するためのフラグ + Flag to stop getting chat. ''' _setup_finished = False @@ -112,24 +111,24 @@ class LiveChat: self._setup() def _setup(self): - # direct modeがTrueでcallback未設定の場合例外発生。 + # An exception is raised when direct mode is true and no callback is set. if self._direct_mode: if self._callback is None: raise exceptions.IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: - # direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 + # Create a default buffer if `direct_mode` is False and buffer is not set. if self._buffer is None: self._buffer = Buffer(maxsize=20) - # callbackが指定されている場合はcallbackを呼ぶループタスクを作成 + # Create a loop task to call callback if the `callback` param is specified. if self._callback is None: pass else: - # callbackを呼ぶループタスクの開始 + # Start a loop task calling callback function. self._executor.submit(self._callback_loop, self._callback) - # _listenループタスクの開始 + # Start a loop task for _listen() self.listen_task = self._executor.submit(self._startlisten) - # add_done_callbackの登録 + # Register add_done_callback if self._done_callback is None: self.listen_task.add_done_callback(self._finish) else: @@ -184,7 +183,7 @@ class LiveChat: self._logger.error(f"{traceback.format_exc(limit=-1)}") raise - self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._logger.debug(f"[{self._video_id}] finished fetching chat.") raise exceptions.ChatDataFinished def _check_pause(self, continuation): @@ -236,30 +235,30 @@ class LiveChat: ''' continuation = urllib.parse.quote(continuation) livechat_json = None - status_code = 0 url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" for _ in range(MAX_RETRY + 1): with client: try: livechat_json = client.get(url, headers=headers).json() break - except json.JSONDecodeError: - time.sleep(1) + except (json.JSONDecodeError, httpx.HTTPError): + time.sleep(2) continue else: self._logger.error(f"[{self._video_id}]" - f"Exceeded retry count. status_code={status_code}") + f"Exceeded retry count.") raise exceptions.RetryExceedMaxCount() return livechat_json def _callback_loop(self, callback): - """ コンストラクタでcallbackを指定している場合、バックグラウンドで - callbackに指定された関数に一定間隔でチャットデータを投げる。 + """ If a callback is specified in the constructor, + it throws chat data at regular intervals to the + function specified in the callback in the backgroun Parameter --------- callback : func - 加工済みのチャットデータを渡す先の関数。 + function to which the processed chat data is passed. """ while self.is_alive(): items = self._buffer.get() @@ -270,11 +269,13 @@ class LiveChat: self._callback(processed_chat) def get(self): - """ bufferからデータを取り出し、processorに投げ、 - 加工済みのチャットデータを返す。 + """ + Retrieves data from the buffer, + throws it to the processor, + and returns the processed chat data. Returns - : Processorによって加工されたチャットデータ + : Chat data processed by the Processor """ if self._callback is None: if self.is_alive(): @@ -283,7 +284,7 @@ class LiveChat: else: return [] raise exceptions.IllegalFunctionCall( - "既にcallbackを登録済みのため、get()は実行できません。") + "Callback parameter is already set, so get() cannot be performed.") def is_replay(self): return self._is_replay @@ -304,13 +305,16 @@ class LiveChat: return self._is_alive def _finish(self, sender): - '''Listener終了時のコールバック''' + '''Called when the _listen() task finished.''' try: self._task_finished() except CancelledError: - self._logger.debug(f'[{self._video_id}]cancelled:{sender}') + self._logger.debug(f'[{self._video_id}] cancelled:{sender}') def terminate(self): + ''' + Terminate fetching chats. + ''' if self._pauser.empty(): self._pauser.put_nowait(None) self._is_alive = False @@ -319,9 +323,6 @@ class LiveChat: self.processor.finalize() def _task_finished(self): - ''' - Listenerを終了する。 - ''' if self.is_alive(): self.terminate() try: @@ -330,7 +331,7 @@ class LiveChat: self.exception = e if not isinstance(e, exceptions.ChatParseException): self._logger.error(f'Internal exception - {type(e)}{str(e)}') - self._logger.info(f'[{self._video_id}]終了しました') + self._logger.info(f'[{self._video_id}] finished.') def raise_for_status(self): if self.exception is not None: diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index 1bd918c..4212fdc 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -76,6 +76,6 @@ class PatternUnmatchError(VideoInfoParseError): ''' Thrown when failed to parse video info with unmatched pattern. ''' - def __init__(self, doc): + def __init__(self, doc=''): self.msg = "PatternUnmatchError" self.doc = doc diff --git a/pytchat/paramgen/arcparam_mining.py b/pytchat/paramgen/arcparam_mining.py deleted file mode 100644 index 7e3525a..0000000 --- a/pytchat/paramgen/arcparam_mining.py +++ /dev/null @@ -1,133 +0,0 @@ -from base64 import urlsafe_b64encode as b64enc -from functools import reduce -import urllib.parse - -''' -Generate continuation parameter of youtube replay chat. - -Author: taizan-hokuto (2019) @taizan205 - -ver 0.0.1 2019.10.05 -''' - - -def _gen_vid_long(video_id): - """generate video_id parameter. - Parameter - --------- - video_id : str - - Return - --------- - byte[] : base64 encoded video_id parameter. - """ - header_magic = b'\x0A\x0F\x1A\x0D\x0A' - header_id = video_id.encode() - header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B' - header_terminator = b'\x20\x01' - - item = [ - header_magic, - _nval(len(header_id)), - header_id, - header_sep_1, - header_id, - header_terminator - ] - - return urllib.parse.quote( - b64enc(reduce(lambda x, y: x + y, item)).decode() - ).encode() - - -def _gen_vid(video_id): - """generate video_id parameter. - Parameter - --------- - video_id : str - - Return - --------- - bytes : base64 encoded video_id parameter. - """ - header_magic = b'\x0A\x0F\x1A\x0D\x0A' - header_id = video_id.encode() - header_terminator = b'\x20\x01' - - item = [ - header_magic, - _nval(len(header_id)), - header_id, - header_terminator - ] - - return urllib.parse.quote( - b64enc(reduce(lambda x, y: x + y, item)).decode() - ).encode() - - -def _nval(val): - """convert value to byte array""" - if val < 0: - raise ValueError - buf = b'' - while val >> 7: - m = val & 0xFF | 0x80 - buf += m.to_bytes(1, 'big') - val >>= 7 - buf += val.to_bytes(1, 'big') - return buf - - -def _build(video_id, seektime, topchat_only): - switch_01 = b'\x04' if topchat_only else b'\x01' - if seektime < 0: - raise ValueError("seektime must be greater than or equal to zero.") - if seektime == 0: - times = b'' - else: - times = _nval(int(seektime * 1000)) - if seektime > 0: - _len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10' - else: - _len_time = b'' - - header_magic = b'\xA2\x9D\xB0\xD3\x04' - sep_0 = b'\x1A' - vid = _gen_vid(video_id) - _tag = b'\x40\x01' - timestamp1 = times - sep_1 = b'\x60\x04\x72\x02\x08' - terminator = b'\x78\x01' - - body = [ - sep_0, - _nval(len(vid)), - vid, - _tag, - _len_time, - timestamp1, - sep_1, - switch_01, - terminator - ] - - body = reduce(lambda x, y: x + y, body) - - return urllib.parse.quote( - b64enc(header_magic + _nval(len(body)) + body - ).decode() - ) - - -def getparam(video_id, seektime=0.0, topchat_only=False): - ''' - Parameter - --------- - seektime : int - unit:seconds - start position of fetching chat data. - topchat_only : bool - if True, fetch only 'top chat' - ''' - return _build(video_id, seektime, topchat_only) diff --git a/pytchat/parser/live.py b/pytchat/parser/live.py index 13540cb..c3e10b3 100644 --- a/pytchat/parser/live.py +++ b/pytchat/parser/live.py @@ -8,15 +8,26 @@ from .. import exceptions class Parser: + ''' + Parser of chat json. + + Parameter + ---------- + is_replay : bool - __slots__ = ['is_replay'] + exception_holder : Object [default:Npne] + The object holding exceptions. + This is passed from the parent livechat object. + ''' + __slots__ = ['is_replay', 'exception_holder'] - def __init__(self, is_replay): + def __init__(self, is_replay, exception_holder=None): self.is_replay = is_replay + self.exception_holder = exception_holder def get_contents(self, jsn): if jsn is None: - raise exceptions.IllegalFunctionCall('Called with none JSON object.') + self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.')) if jsn['response']['responseContext'].get('errors'): raise exceptions.ResponseContextError( 'The video_id would be wrong, or video is deleted or private.') @@ -42,11 +53,11 @@ class Parser: if contents is None: '''Broadcasting end or cannot fetch chat stream''' - raise exceptions.NoContents('Chat data stream is empty.') + self.raise_exception(exceptions.NoContents('Chat data stream is empty.')) cont = contents['liveChatContinuation']['continuations'][0] if cont is None: - raise exceptions.NoContinuation('No Continuation') + self.raise_exception(exceptions.NoContinuation('No Continuation')) metadata = (cont.get('invalidationContinuationData') or cont.get('timedContinuationData') or cont.get('reloadContinuationData') @@ -54,13 +65,13 @@ class Parser: ) if metadata is None: if cont.get("playerSeekContinuationData"): - raise exceptions.ChatDataFinished('Finished chat data') + self.raise_exception(exceptions.ChatDataFinished('Finished chat data')) unknown = list(cont.keys())[0] if unknown: - raise exceptions.ReceivedUnknownContinuation( - f"Received unknown continuation type:{unknown}") + self.raise_exception(exceptions.ReceivedUnknownContinuation( + f"Received unknown continuation type:{unknown}")) else: - raise exceptions.FailedExtractContinuation('Cannot extract continuation data') + self.raise_exception(exceptions.FailedExtractContinuation('Cannot extract continuation data')) return self._create_data(metadata, contents) def reload_continuation(self, contents): @@ -72,7 +83,7 @@ class Parser: """ if contents is None: '''Broadcasting end or cannot fetch chat stream''' - raise exceptions.NoContents('Chat data stream is empty.') + self.raise_exception(exceptions.NoContents('Chat data stream is empty.')) cont = contents['liveChatContinuation']['continuations'][0] if cont.get("liveChatReplayContinuationData"): # chat data exist. @@ -81,7 +92,7 @@ class Parser: init_cont = cont.get("playerSeekContinuationData") if init_cont: return init_cont.get("continuation") - raise exceptions.ChatDataFinished('Finished chat data') + self.raise_exception(exceptions.ChatDataFinished('Finished chat data')) def _create_data(self, metadata, contents): actions = contents['liveChatContinuation'].get('actions') @@ -103,3 +114,8 @@ class Parser: start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"]) last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"]) return (last - start) + + def raise_exception(self, exception): + if self.exception_holder is None: + raise exception + self.exception_holder = exception diff --git a/pytchat/processors/combinator.py b/pytchat/processors/combinator.py index 7784418..ff07da9 100644 --- a/pytchat/processors/combinator.py +++ b/pytchat/processors/combinator.py @@ -36,3 +36,7 @@ class Combinator(ChatProcessor): ''' return tuple(processor.process(chat_components) for processor in self.processors) + + def finalize(self, *args, **kwargs): + [processor.finalize(*args, **kwargs) + for processor in self.processors] diff --git a/pytchat/processors/default/custom_encoder.py b/pytchat/processors/default/custom_encoder.py new file mode 100644 index 0000000..1916d1b --- /dev/null +++ b/pytchat/processors/default/custom_encoder.py @@ -0,0 +1,11 @@ +import json +from .renderer.base import Author +from .renderer.paidmessage import Colors +from .renderer.paidsticker import Colors2 + + +class CustomEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Author) or isinstance(obj, Colors) or isinstance(obj, Colors2): + return vars(obj) + return json.JSONEncoder.default(self, obj) diff --git a/pytchat/processors/default/processor.py b/pytchat/processors/default/processor.py index c4f8f47..d5e87a8 100644 --- a/pytchat/processors/default/processor.py +++ b/pytchat/processors/default/processor.py @@ -1,5 +1,7 @@ import asyncio +import json import time +from .custom_encoder import CustomEncoder from .renderer.textmessage import LiveChatTextMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer @@ -11,25 +13,120 @@ from ... import config logger = config.logger(__name__) +class Chat: + def json(self) -> str: + return json.dumps(vars(self), ensure_ascii=False, cls=CustomEncoder) + + class Chatdata: - def __init__(self, chatlist: list, timeout: float): + + def __init__(self, chatlist: list, timeout: float, abs_diff): self.items = chatlist self.interval = timeout + self.abs_diff = abs_diff + self.itemcount = 0 def tick(self): - if self.interval == 0: + '''DEPRECATE + Use sync_items() + ''' + if len(self.items) < 1: time.sleep(1) return - time.sleep(self.interval / len(self.items)) + if self.itemcount == 0: + self.starttime = time.time() + if len(self.items) == 1: + total_itemcount = 1 + else: + total_itemcount = len(self.items) - 1 + next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000 + tobe_disptime = self.abs_diff + next_chattime + wait_sec = tobe_disptime - time.time() + self.itemcount += 1 + + if wait_sec < 0: + wait_sec = 0 + + time.sleep(wait_sec) async def tick_async(self): - if self.interval == 0: + '''DEPRECATE + Use async_items() + ''' + if len(self.items) < 1: await asyncio.sleep(1) return - await asyncio.sleep(self.interval / len(self.items)) + if self.itemcount == 0: + self.starttime = time.time() + if len(self.items) == 1: + total_itemcount = 1 + else: + total_itemcount = len(self.items) - 1 + next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000 + tobe_disptime = self.abs_diff + next_chattime + wait_sec = tobe_disptime - time.time() + self.itemcount += 1 + + if wait_sec < 0: + wait_sec = 0 + + await asyncio.sleep(wait_sec) + + def sync_items(self): + starttime = time.time() + if len(self.items) > 0: + last_chattime = self.items[-1].timestamp / 1000 + tobe_disptime = self.abs_diff + last_chattime + wait_total_sec = max(tobe_disptime - time.time(), 0) + if len(self.items) > 1: + wait_sec = wait_total_sec / len(self.items) + elif len(self.items) == 1: + wait_sec = 0 + for c in self.items: + if wait_sec < 0: + wait_sec = 0 + time.sleep(wait_sec) + yield c + stop_interval = time.time() - starttime + if stop_interval < 1: + time.sleep(1 - stop_interval) + + async def async_items(self): + starttime = time.time() + if len(self.items) > 0: + last_chattime = self.items[-1].timestamp / 1000 + tobe_disptime = self.abs_diff + last_chattime + wait_total_sec = max(tobe_disptime - time.time(), 0) + if len(self.items) > 1: + wait_sec = wait_total_sec / len(self.items) + elif len(self.items) == 1: + wait_sec = 0 + for c in self.items: + if wait_sec < 0: + wait_sec = 0 + await asyncio.sleep(wait_sec) + yield c + + stop_interval = time.time() - starttime + if stop_interval < 1: + await asyncio.sleep(1 - stop_interval) + + def json(self) -> str: + return json.dumps([vars(a) for a in self.items], ensure_ascii=False, cls=CustomEncoder) class DefaultProcessor(ChatProcessor): + def __init__(self): + self.first = True + self.abs_diff = 0 + self.renderers = { + "liveChatTextMessageRenderer": LiveChatTextMessageRenderer(), + "liveChatPaidMessageRenderer": LiveChatPaidMessageRenderer(), + "liveChatPaidStickerRenderer": LiveChatPaidStickerRenderer(), + "liveChatLegacyPaidMessageRenderer": LiveChatLegacyPaidMessageRenderer(), + "liveChatMembershipItemRenderer": LiveChatMembershipItemRenderer() + } + def process(self, chat_components: list): chatlist = [] @@ -37,6 +134,8 @@ class DefaultProcessor(ChatProcessor): if chat_components: for component in chat_components: + if component is None: + continue timeout += component.get('timeout', 0) chatdata = component.get('chatdata') if chatdata is None: @@ -46,43 +145,35 @@ class DefaultProcessor(ChatProcessor): continue if action.get('addChatItemAction') is None: continue - if action['addChatItemAction'].get('item') is None: + item = action['addChatItemAction'].get('item') + if item is None: continue - - chat = self._parse(action) + chat = self._parse(item) if chat: chatlist.append(chat) - return Chatdata(chatlist, float(timeout)) + + if self.first and chatlist: + self.abs_diff = time.time() - chatlist[0].timestamp / 1000 + 2 + self.first = False - def _parse(self, sitem): - action = sitem.get("addChatItemAction") - if action: - item = action.get("item") - if item is None: - return None + chatdata = Chatdata(chatlist, float(timeout), self.abs_diff) + + return chatdata + + def _parse(self, item): try: - renderer = self._get_renderer(item) + key = list(item.keys())[0] + renderer = self.renderers.get(key) if renderer is None: return None - + renderer.setitem(item.get(key), Chat()) + renderer.settype() renderer.get_snippet() renderer.get_authordetails() + rendered_chatobj = renderer.get_chatobj() + renderer.clear() except (KeyError, TypeError) as e: - logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}") + logger.error(f"{str(type(e))}-{str(e)} item:{str(item)}") return None - return renderer - - def _get_renderer(self, item): - if item.get("liveChatTextMessageRenderer"): - renderer = LiveChatTextMessageRenderer(item) - elif item.get("liveChatPaidMessageRenderer"): - renderer = LiveChatPaidMessageRenderer(item) - elif item.get("liveChatPaidStickerRenderer"): - renderer = LiveChatPaidStickerRenderer(item) - elif item.get("liveChatLegacyPaidMessageRenderer"): - renderer = LiveChatLegacyPaidMessageRenderer(item) - elif item.get("liveChatMembershipItemRenderer"): - renderer = LiveChatMembershipItemRenderer(item) - else: - renderer = None - return renderer + + return rendered_chatobj diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py index 64fbecc..d6826c9 100644 --- a/pytchat/processors/default/renderer/base.py +++ b/pytchat/processors/default/renderer/base.py @@ -6,89 +6,96 @@ class Author: class BaseRenderer: - def __init__(self, item, chattype): - self.renderer = list(item.values())[0] - self.chattype = chattype - self.author = Author() + def setitem(self, item, chat): + self.item = item + self.chat = chat + self.chat.author = Author() + + def settype(self): + pass def get_snippet(self): - self.type = self.chattype - self.id = self.renderer.get('id') - timestampUsec = int(self.renderer.get("timestampUsec", 0)) - self.timestamp = int(timestampUsec / 1000) - tst = self.renderer.get("timestampText") + self.chat.id = self.item.get('id') + timestampUsec = int(self.item.get("timestampUsec", 0)) + self.chat.timestamp = int(timestampUsec / 1000) + tst = self.item.get("timestampText") if tst: - self.elapsedTime = tst.get("simpleText") + self.chat.elapsedTime = tst.get("simpleText") else: - self.elapsedTime = "" - self.datetime = self.get_datetime(timestampUsec) - self.message, self.messageEx = self.get_message(self.renderer) - self.id = self.renderer.get('id') - self.amountValue = 0.0 - self.amountString = "" - self.currency = "" - self.bgColor = 0 + self.chat.elapsedTime = "" + self.chat.datetime = self.get_datetime(timestampUsec) + self.chat.message, self.chat.messageEx = self.get_message(self.item) + self.chat.id = self.item.get('id') + self.chat.amountValue = 0.0 + self.chat.amountString = "" + self.chat.currency = "" + self.chat.bgColor = 0 def get_authordetails(self): - self.author.badgeUrl = "" - (self.author.isVerified, - self.author.isChatOwner, - self.author.isChatSponsor, - self.author.isChatModerator) = ( - self.get_badges(self.renderer) + self.chat.author.badgeUrl = "" + (self.chat.author.isVerified, + self.chat.author.isChatOwner, + self.chat.author.isChatSponsor, + self.chat.author.isChatModerator) = ( + self.get_badges(self.item) ) - self.author.channelId = self.renderer.get("authorExternalChannelId") - self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId - self.author.name = self.renderer["authorName"]["simpleText"] - self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"] + self.chat.author.channelId = self.item.get("authorExternalChannelId") + self.chat.author.channelUrl = "http://www.youtube.com/channel/" + self.chat.author.channelId + self.chat.author.name = self.item["authorName"]["simpleText"] + self.chat.author.imageUrl = self.item["authorPhoto"]["thumbnails"][1]["url"] - def get_message(self, renderer): + def get_message(self, item): message = '' message_ex = [] - if renderer.get("message"): - runs = renderer["message"].get("runs") - if runs: - for r in runs: - if r: - if r.get('emoji'): - message += r['emoji'].get('shortcuts', [''])[0] - message_ex.append({ - 'id': r['emoji'].get('emojiId').split('/')[-1], - 'txt': r['emoji'].get('shortcuts', [''])[0], - 'url': r['emoji']['image']['thumbnails'][0].get('url') - }) - else: - message += r.get('text', '') - message_ex.append(r.get('text', '')) + runs = item.get("message", {}).get("runs", {}) + for r in runs: + if not hasattr(r, "get"): + continue + if r.get('emoji'): + message += r['emoji'].get('shortcuts', [''])[0] + message_ex.append({ + 'id': r['emoji'].get('emojiId').split('/')[-1], + 'txt': r['emoji'].get('shortcuts', [''])[0], + 'url': r['emoji']['image']['thumbnails'][0].get('url') + }) + else: + message += r.get('text', '') + message_ex.append(r.get('text', '')) return message, message_ex def get_badges(self, renderer): - self.author.type = '' + self.chat.author.type = '' isVerified = False isChatOwner = False isChatSponsor = False isChatModerator = False - badges = renderer.get("authorBadges") - if badges: - for badge in badges: - if badge["liveChatAuthorBadgeRenderer"].get("icon"): - author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] - self.author.type = author_type - if author_type == 'VERIFIED': - isVerified = True - if author_type == 'OWNER': - isChatOwner = True - if author_type == 'MODERATOR': - isChatModerator = True - if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"): - isChatSponsor = True - self.author.type = 'MEMBER' - self.get_badgeurl(badge) + badges = renderer.get("authorBadges", {}) + for badge in badges: + if badge["liveChatAuthorBadgeRenderer"].get("icon"): + author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] + self.chat.author.type = author_type + if author_type == 'VERIFIED': + isVerified = True + if author_type == 'OWNER': + isChatOwner = True + if author_type == 'MODERATOR': + isChatModerator = True + if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"): + isChatSponsor = True + self.chat.author.type = 'MEMBER' + self.get_badgeurl(badge) return isVerified, isChatOwner, isChatSponsor, isChatModerator def get_badgeurl(self, badge): - self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] + self.chat.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] def get_datetime(self, timestamp): dt = datetime.fromtimestamp(timestamp / 1000000) return dt.strftime('%Y-%m-%d %H:%M:%S') + + def get_chatobj(self): + return self.chat + + def clear(self): + self.item = None + self.chat = None diff --git a/pytchat/processors/default/renderer/legacypaid.py b/pytchat/processors/default/renderer/legacypaid.py index dc6b2e4..f3b31c4 100644 --- a/pytchat/processors/default/renderer/legacypaid.py +++ b/pytchat/processors/default/renderer/legacypaid.py @@ -2,14 +2,14 @@ from .base import BaseRenderer class LiveChatLegacyPaidMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "newSponsor") + def settype(self): + self.chat.type = "newSponsor" def get_authordetails(self): super().get_authordetails() - self.author.isChatSponsor = True + self.chat.author.isChatSponsor = True - def get_message(self, renderer): - message = (renderer["eventText"]["runs"][0]["text"] - ) + ' / ' + (renderer["detailText"]["simpleText"]) + def get_message(self, item): + message = (item["eventText"]["runs"][0]["text"] + ) + ' / ' + (item["detailText"]["simpleText"]) return message, [message] diff --git a/pytchat/processors/default/renderer/membership.py b/pytchat/processors/default/renderer/membership.py index 7a7d100..f198abf 100644 --- a/pytchat/processors/default/renderer/membership.py +++ b/pytchat/processors/default/renderer/membership.py @@ -2,14 +2,17 @@ from .base import BaseRenderer class LiveChatMembershipItemRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "newSponsor") + def settype(self): + self.chat.type = "newSponsor" def get_authordetails(self): super().get_authordetails() - self.author.isChatSponsor = True + self.chat.author.isChatSponsor = True - def get_message(self, renderer): - message = ''.join([mes.get("text", "") - for mes in renderer["headerSubtext"]["runs"]]) + def get_message(self, item): + try: + message = ''.join([mes.get("text", "") + for mes in item["headerSubtext"]["runs"]]) + except KeyError: + return "Welcome New Member!", ["Welcome New Member!"] return message, [message] diff --git a/pytchat/processors/default/renderer/paidmessage.py b/pytchat/processors/default/renderer/paidmessage.py index 70bd055..19ae001 100644 --- a/pytchat/processors/default/renderer/paidmessage.py +++ b/pytchat/processors/default/renderer/paidmessage.py @@ -9,23 +9,23 @@ class Colors: class LiveChatPaidMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "superChat") + def settype(self): + self.chat.type = "superChat" def get_snippet(self): super().get_snippet() amountDisplayString, symbol, amount = ( - self.get_amountdata(self.renderer) + self.get_amountdata(self.item) ) - self.amountValue = amount - self.amountString = amountDisplayString - self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + self.chat.amountValue = amount + self.chat.amountString = amountDisplayString + self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( symbol) else symbol - self.bgColor = self.renderer.get("bodyBackgroundColor", 0) - self.colors = self.get_colors() + self.chat.bgColor = self.item.get("bodyBackgroundColor", 0) + self.chat.colors = self.get_colors() - def get_amountdata(self, renderer): - amountDisplayString = renderer["purchaseAmountText"]["simpleText"] + def get_amountdata(self, item): + amountDisplayString = item["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) @@ -36,11 +36,12 @@ class LiveChatPaidMessageRenderer(BaseRenderer): return amountDisplayString, symbol, amount def get_colors(self): + item = self.item colors = Colors() - colors.headerBackgroundColor = self.renderer.get("headerBackgroundColor", 0) - colors.headerTextColor = self.renderer.get("headerTextColor", 0) - colors.bodyBackgroundColor = self.renderer.get("bodyBackgroundColor", 0) - colors.bodyTextColor = self.renderer.get("bodyTextColor", 0) - colors.timestampColor = self.renderer.get("timestampColor", 0) - colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) + colors.headerBackgroundColor = item.get("headerBackgroundColor", 0) + colors.headerTextColor = item.get("headerTextColor", 0) + colors.bodyBackgroundColor = item.get("bodyBackgroundColor", 0) + colors.bodyTextColor = item.get("bodyTextColor", 0) + colors.timestampColor = item.get("timestampColor", 0) + colors.authorNameTextColor = item.get("authorNameTextColor", 0) return colors diff --git a/pytchat/processors/default/renderer/paidsticker.py b/pytchat/processors/default/renderer/paidsticker.py index 6a52a7e..cfa4fa1 100644 --- a/pytchat/processors/default/renderer/paidsticker.py +++ b/pytchat/processors/default/renderer/paidsticker.py @@ -4,30 +4,30 @@ from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") -class Colors: +class Colors2: pass class LiveChatPaidStickerRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "superSticker") + def settype(self): + self.chat.type = "superSticker" def get_snippet(self): super().get_snippet() amountDisplayString, symbol, amount = ( - self.get_amountdata(self.renderer) + self.get_amountdata(self.item) ) - self.amountValue = amount - self.amountString = amountDisplayString - self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + self.chat.amountValue = amount + self.chat.amountString = amountDisplayString + self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( symbol) else symbol - self.bgColor = self.renderer.get("backgroundColor", 0) - self.sticker = "".join(("https:", - self.renderer["sticker"]["thumbnails"][0]["url"])) - self.colors = self.get_colors() + self.chat.bgColor = self.item.get("backgroundColor", 0) + self.chat.sticker = "".join(("https:", + self.item["sticker"]["thumbnails"][0]["url"])) + self.chat.colors = self.get_colors() - def get_amountdata(self, renderer): - amountDisplayString = renderer["purchaseAmountText"]["simpleText"] + def get_amountdata(self, item): + amountDisplayString = item["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) @@ -38,9 +38,10 @@ class LiveChatPaidStickerRenderer(BaseRenderer): return amountDisplayString, symbol, amount def get_colors(self): - colors = Colors() - colors.moneyChipBackgroundColor = self.renderer.get("moneyChipBackgroundColor", 0) - colors.moneyChipTextColor = self.renderer.get("moneyChipTextColor", 0) - colors.backgroundColor = self.renderer.get("backgroundColor", 0) - colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) + item = self.item + colors = Colors2() + colors.moneyChipBackgroundColor = item.get("moneyChipBackgroundColor", 0) + colors.moneyChipTextColor = item.get("moneyChipTextColor", 0) + colors.backgroundColor = item.get("backgroundColor", 0) + colors.authorNameTextColor = item.get("authorNameTextColor", 0) return colors diff --git a/pytchat/processors/default/renderer/textmessage.py b/pytchat/processors/default/renderer/textmessage.py index 475a70d..608cfef 100644 --- a/pytchat/processors/default/renderer/textmessage.py +++ b/pytchat/processors/default/renderer/textmessage.py @@ -2,5 +2,5 @@ from .base import BaseRenderer class LiveChatTextMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "textMessage") + def settype(self): + self.chat.type = "textMessage" diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 95ada14..f969a3a 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -3,7 +3,7 @@ import os import re import time from base64 import standard_b64encode -from httpx import NetworkError, ReadTimeout +from concurrent.futures import ThreadPoolExecutor from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor from ..exceptions import UnknownConnectionError @@ -48,12 +48,14 @@ class HTMLArchiver(ChatProcessor): ''' def __init__(self, save_path, callback=None): super().__init__() + self.client = httpx.Client(http2=True) self.save_path = self._checkpath(save_path) self.processor = DefaultProcessor() self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.header = [HEADER_HTML] self.body = ['\n', '
name
\n', self._parse_table_header(fmt_headers)] self.callback = callback + self.executor = ThreadPoolExecutor(max_workers=10) def _checkpath(self, filepath): splitter = os.path.splitext(os.path.basename(filepath)) @@ -80,7 +82,7 @@ class HTMLArchiver(ChatProcessor): save_path : str : Actual save path of file. total_lines : int : - count of total lines written to the file. + Count of total lines written to the file. """ if chat_components is None or len(chat_components) == 0: return @@ -118,9 +120,9 @@ class HTMLArchiver(ChatProcessor): err = None for _ in range(5): try: - resp = httpx.get(url, timeout=30) + resp = self.client.get(url, timeout=30) break - except (NetworkError, ReadTimeout) as e: + except httpx.HTTPError as e: print("Network Error. retrying...") err = e time.sleep(3) @@ -132,7 +134,7 @@ class HTMLArchiver(ChatProcessor): def _set_emoji_table(self, item: dict): emoji_id = item['id'] if emoji_id not in self.emoji_table: - self.emoji_table.setdefault(emoji_id, self._encode_img(item['url'])) + self.emoji_table.setdefault(emoji_id, self.executor.submit(self._encode_img, item['url'])) return emoji_id def _stylecode(self, name, code, width, height): @@ -143,11 +145,12 @@ class HTMLArchiver(ChatProcessor): def _create_styles(self): return '\n'.join(('\n')) def finalize(self): + self.executor.shutdown() self.header.extend([self._create_styles(), '\n']) self.body.extend(['
\n\n']) with open(self.save_path, mode='a', encoding='utf-8') as f: diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py index ab6fca3..a4c52e8 100644 --- a/pytchat/tool/extract/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -9,7 +9,6 @@ from ... import config from ... paramgen import arcparam from ... exceptions import UnknownConnectionError from concurrent.futures import CancelledError -from httpx import NetworkError, TimeoutException, ConnectError from json import JSONDecodeError from urllib.parse import quote @@ -81,7 +80,7 @@ def ready_blocks(video_id, duration, div, callback): break except JSONDecodeError: await asyncio.sleep(3) - except (NetworkError, TimeoutException, ConnectError) as e: + except httpx.HTTPError as e: err = e await asyncio.sleep(3) else: @@ -137,7 +136,7 @@ def fetch_patch(callback, blocks, video_id): break except JSONDecodeError: await asyncio.sleep(3) - except (NetworkError, TimeoutException, ConnectError) as e: + except httpx.HTTPError as e: err = e await asyncio.sleep(3) except socket.error as error: diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index 722a619..1744de7 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -2,15 +2,14 @@ import httpx import json import re import time -from httpx import ConnectError, NetworkError, TimeoutException from .. import config from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError from ..util.extract_video_id import extract_video_id headers = config.headers - pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})") +pattern2 = re.compile(r"yt\.setConfig\((\{[\s\S]*?\})\);") item_channel_id = [ "videoDetails", @@ -32,6 +31,10 @@ item_response = [ "embedded_player_response" ] +item_response2 = [ + "PLAYER_VARS", + "embedded_player_response" +] item_author_image = [ "videoDetails", "embeddedPlayerOverlayVideoDetailsRenderer", @@ -83,6 +86,8 @@ class VideoInfo: def __init__(self, video_id): self.video_id = extract_video_id(video_id) + self.client = httpx.Client(http2=True) + self.new_pattern_text = False err = None for _ in range(3): try: @@ -90,7 +95,6 @@ class VideoInfo: self._parse(text) break except (InvalidVideoIdException, UnknownConnectionError) as e: - print(str(e)) raise e except Exception as e: err = e @@ -104,10 +108,10 @@ class VideoInfo: err = None for _ in range(3): try: - resp = httpx.get(url, headers=headers) + resp = self.client.get(url, headers=headers) resp.raise_for_status() break - except (ConnectError, NetworkError, TimeoutException) as e: + except httpx.HTTPError as e: err = e time.sleep(3) else: @@ -118,12 +122,25 @@ class VideoInfo: def _parse(self, text): result = re.search(pattern, text) if result is None: - raise PatternUnmatchError(doc=text) + result = re.search(pattern2, text) + if result is None: + raise PatternUnmatchError(doc=text) + else: + self.new_pattern_text = True decoder = json.JSONDecoder() - res = decoder.raw_decode(result.group(1)[:-1])[0] - response = self._get_item(res, item_response) + if self.new_pattern_text: + res = decoder.raw_decode(result.group(1))[0] + else: + res = decoder.raw_decode(result.group(1)[:-1])[0] + if self.new_pattern_text: + response = self._get_item(res, item_response2) + else: + response = self._get_item(res, item_response) if response is None: - self._check_video_is_private(res.get("args")) + if self.new_pattern_text: + self._check_video_is_private(res.get("PLAYER_VARS")) + else: + self._check_video_is_private(res.get("args")) self._renderer = self._get_item(json.loads(response), item_renderer) if self._renderer is None: raise InvalidVideoIdException( diff --git a/pytchat/util/extract_video_id.py b/pytchat/util/extract_video_id.py index c62cd89..2f02ab0 100644 --- a/pytchat/util/extract_video_id.py +++ b/pytchat/util/extract_video_id.py @@ -17,12 +17,12 @@ def extract_video_id(url_or_id: str) -> str: return url_or_id match = re.search(PATTERN, url_or_id) if match is None: - raise InvalidVideoIdException(url_or_id) + raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") try: ret = match.group(4) except IndexError: - raise InvalidVideoIdException(url_or_id) + raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: - raise InvalidVideoIdException(url_or_id) + raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") return ret diff --git a/tests/test_default_processor.py b/tests/test_default_processor.py index 5ab0c34..7ed3ec3 100644 --- a/tests/test_default_processor.py +++ b/tests/test_default_processor.py @@ -17,7 +17,6 @@ def test_textmessage(mocker): } ret = processor.process([data]).items[0] - assert ret.chattype == "textMessage" assert ret.id == "dummy_id" assert ret.message == "dummy_message" assert ret.timestamp == 1570678496000 @@ -47,7 +46,6 @@ def test_textmessage_replay_member(mocker): } ret = processor.process([data]).items[0] - assert ret.chattype == "textMessage" assert ret.type == "textMessage" assert ret.id == "dummy_id" assert ret.message == "dummy_message" @@ -80,8 +78,6 @@ def test_superchat(mocker): } ret = processor.process([data]).items[0] - print(json.dumps(chatdata, ensure_ascii=False)) - assert ret.chattype == "superChat" assert ret.type == "superChat" assert ret.id == "dummy_id" assert ret.message == "dummy_message" @@ -124,8 +120,6 @@ def test_supersticker(mocker): } ret = processor.process([data]).items[0] - print(json.dumps(chatdata, ensure_ascii=False)) - assert ret.chattype == "superSticker" assert ret.type == "superSticker" assert ret.id == "dummy_id" assert ret.message == "" @@ -167,8 +161,6 @@ def test_sponsor(mocker): } ret = processor.process([data]).items[0] - print(json.dumps(chatdata, ensure_ascii=False)) - assert ret.chattype == "newSponsor" assert ret.type == "newSponsor" assert ret.id == "dummy_id" assert ret.message == "新規メンバー" @@ -202,8 +194,6 @@ def test_sponsor_legacy(mocker): } ret = processor.process([data]).items[0] - print(json.dumps(chatdata, ensure_ascii=False)) - assert ret.chattype == "newSponsor" assert ret.type == "newSponsor" assert ret.id == "dummy_id" assert ret.message == "新規メンバー / ようこそ、author_name!" diff --git a/tests/test_videoinfo.py b/tests/test_videoinfo.py index c2a8803..7189fef 100644 --- a/tests/test_videoinfo.py +++ b/tests/test_videoinfo.py @@ -1,6 +1,6 @@ from json.decoder import JSONDecodeError from pytchat.tool.videoinfo import VideoInfo -from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError +from pytchat.exceptions import InvalidVideoIdException def _open_file(path): @@ -13,7 +13,7 @@ def _set_test_data(filepath, mocker): response_mock = mocker.Mock() response_mock.status_code = 200 response_mock.text = _text - mocker.patch('httpx.get').return_value = response_mock + mocker.patch('httpx.Client.get').return_value = response_mock def test_archived_page(mocker): @@ -85,7 +85,7 @@ def test_pattern_unmatch(mocker): try: _ = VideoInfo('__test_id__') assert False - except PatternUnmatchError: + except JSONDecodeError: assert True