diff --git a/README.md b/README.md
index 9c2267b..2d28adb 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,7 @@ pytchat is a python library for fetching youtube live chat.
## Description
pytchat is a python library for fetching youtube live chat
-without using youtube api, Selenium or BeautifulSoup.
-
-pytchatは、YouTubeチャットを閲覧するためのpythonライブラリです。
+without using Selenium or BeautifulSoup.
Other features:
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
@@ -16,7 +14,7 @@ Other features:
instead of web scraping.
For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki).
-より詳細な解説は[wiki](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)を参照してください。
+[wiki (Japanese)](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)
## Install
```python
@@ -27,128 +25,55 @@ pip install pytchat
### CLI
One-liner command.
-Save chat data to html, with embedded custom emojis.
+Save chat data to html with embedded custom emojis.
+Show chat stream (--echo option).
```bash
-$ pytchat -v https://www.youtube.com/watch?v=ZJ6Q4U_Vg6s -o "c:/temp/"
+$ pytchat -v https://www.youtube.com/watch?v=uIx8l2xlYVY -o "c:/temp/"
# options:
# -v : Video ID or URL that includes ID
# -o : output directory (default path: './')
+# --echo : Show chats.
# saved filename is [video_id].html
```
-### on-demand mode
+### On-demand mode with simple non-buffered object.
```python
-from pytchat import LiveChat
-livechat = LiveChat(video_id = "Zvp1pJpie4I")
-# It is also possible to specify a URL that includes the video ID:
-# livechat = LiveChat("https://www.youtube.com/watch?v=Zvp1pJpie4I")
-while livechat.is_alive():
- try:
- chatdata = livechat.get()
- for c in chatdata.items:
- print(f"{c.datetime} [{c.author.name}]- {c.message}")
- chatdata.tick()
- except KeyboardInterrupt:
- livechat.terminate()
- break
-```
-
-### callback mode
-```python
-from pytchat import LiveChat
-import time
-
-def main():
- livechat = LiveChat(video_id = "Zvp1pJpie4I", callback = disp)
- while livechat.is_alive():
- #other background operation.
- time.sleep(1)
- livechat.terminate()
-
-#callback function (automatically called)
-def disp(chatdata):
- for c in chatdata.items:
- print(f"{c.datetime} [{c.author.name}]- {c.message}")
- chatdata.tick()
-
-if __name__ == '__main__':
- main()
-
-```
-
-### asyncio context:
-```python
-from pytchat import LiveChatAsync
-from concurrent.futures import CancelledError
-import asyncio
-
-async def main():
- livechat = LiveChatAsync("Zvp1pJpie4I", callback = func)
- while livechat.is_alive():
- #other background operation.
- await asyncio.sleep(3)
-
-#callback function is automatically called.
-async def func(chatdata):
- for c in chatdata.items:
- print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
- await chatdata.tick_async()
-
-if __name__ == '__main__':
- try:
- loop = asyncio.get_event_loop()
- loop.run_until_complete(main())
- except CancelledError:
- pass
-```
-
-
-### youtube api compatible processor:
-```python
-from pytchat import LiveChat, CompatibleProcessor
-import time
-
-chat = LiveChat("Zvp1pJpie4I",
- processor = CompatibleProcessor() )
-
+import pytchat
+chat = pytchat.create(video_id="uIx8l2xlYVY")
while chat.is_alive():
- try:
- data = chat.get()
- polling = data['pollingIntervalMillis']/1000
- for c in data['items']:
- if c.get('snippet'):
- print(f"[{c['authorDetails']['displayName']}]"
- f"-{c['snippet']['displayMessage']}")
- time.sleep(polling/len(data['items']))
- except KeyboardInterrupt:
- chat.terminate()
+ for c in chat.get().sync_items():
+ print(f"{c.datetime} [{c.author.name}]- {c.message}")
```
-### replay:
-If specified video is not live,
-automatically try to fetch archived chat data.
+### Output JSON format (feature of [DefaultProcessor](DefaultProcessor))
```python
-from pytchat import LiveChat
+import pytchat
+import time
-def main():
- #seektime (seconds): start position of chat.
- chat = LiveChat("ojes5ULOqhc", seektime = 60*30)
- print('Replay from 30:00')
- try:
- while chat.is_alive():
- data = chat.get()
- for c in data.items:
- print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}")
- data.tick()
- except KeyboardInterrupt:
- chat.terminate()
-
-if __name__ == '__main__':
- main()
+chat = pytchat.create(video_id="uIx8l2xlYVY")
+while chat.is_alive():
+ print(chat.get().json())
+ time.sleep(5)
+ '''
+ # Each chat item can also be output in JSON format.
+ for c in chat.get().sync_items():
+ print(c.json())
+ '''
```
-### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
+
+
+### other
+#### Fetch chat with buffer.
+[LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat)
+
+#### Asyncio Context
+[LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync)
+
+#### [YT API compatible chat processor]https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor)
+
+### [Extract archived chat data](https://github.com/taizan-hokuto/pytchat/wiki/Extractor)
```python
from pytchat import HTMLArchiver, Extractor
@@ -164,7 +89,7 @@ print("finished.")
```
## Structure of Default Processor
-Each item can be got with `items` function.
+Each item can be got with `sync_items()` function.
| name | @@ -298,6 +223,9 @@ Most of source code of CLI refer to: [PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader) +Progress bar in CLI is based on: + +[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3) ## Author diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 3343b8c..ebd5725 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,13 +2,28 @@ pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.3.2' +__version__ = '0.4.0' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __url__ = 'https://github.com/taizan-hokuto/pytchat' -__all__ = ["core_async","core_multithread","processors"] + +from .exceptions import ( + ChatParseException, + ResponseContextError, + NoContents, + NoContinuation, + IllegalFunctionCall, + InvalidVideoIdException, + UnknownConnectionError, + RetryExceedMaxCount, + ChatDataFinished, + ReceivedUnknownContinuation, + FailedExtractContinuation, + VideoInfoParseError, + PatternUnmatchError +) from .api import ( cli, @@ -26,7 +41,7 @@ from .api import ( SimpleDisplayProcessor, SpeedCalculator, SuperchatCalculator, - VideoInfo + VideoInfo, + create ) - # flake8: noqa \ No newline at end of file diff --git a/pytchat/api.py b/pytchat/api.py index 7c67436..bf64e07 100644 --- a/pytchat/api.py +++ b/pytchat/api.py @@ -1,5 +1,6 @@ from . import cli from . import config +from .core import create from .core_multithread.livechat import LiveChat from .core_async.livechat import LiveChatAsync from .processors.chat_processor import ChatProcessor @@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator from .tool.extract.extractor import Extractor from .tool.videoinfo import VideoInfo +__all__ = [ + cli, + config, + LiveChat, + LiveChatAsync, + ChatProcessor, + CompatibleProcessor, + DummyProcessor, + DefaultProcessor, + Extractor, + HTMLArchiver, + TSVArchiver, + JsonfileArchiver, + SimpleDisplayProcessor, + SpeedCalculator, + SuperchatCalculator, + VideoInfo, + create +] + # flake8: noqa \ No newline at end of file diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index f8cffd3..0d28485 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError from pathlib import Path from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError from .arguments import Arguments +from .echo import Echo from .progressbar import ProgressBar from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError from .. processors.html_archiver import HTMLArchiver @@ -41,11 +42,13 @@ def main(): help='Save error data when error occurs(".dat" file)') parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', help='Show version') + parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true', + help='Show chats of specified video') Arguments(parser.parse_args().__dict__) if Arguments().print_version: - print(f'pytchat v{__version__} © 2019 taizan-hokuto') + print(f'pytchat v{__version__} © 2019,2020 taizan-hokuto') return # Extractor @@ -53,6 +56,20 @@ def main(): parser.print_help() return + # Echo + if Arguments().echo: + if len(Arguments().video_ids) > 1: + print("You can specify only one video ID.") + return + try: + Echo(Arguments().video_ids[0]).run() + except InvalidVideoIdException as e: + print("Invalid video id:", str(e)) + except Exception as e: + print(type(e), str(e)) + finally: + return + if not os.path.exists(Arguments().output): print("\nThe specified directory does not exist.:{}\n".format(Arguments().output)) return @@ -77,10 +94,13 @@ class Runner: path = util.checkpath(separated_path + video_id + '.html') try: info = VideoInfo(video_id) - except Exception as e: + except (PatternUnmatchError, JSONDecodeError) as e: print("Cannot parse video information.:{} {}".format(video_id, type(e))) if Arguments().save_error_data: - util.save(str(e), "ERR", ".dat") + util.save(str(e.doc), "ERR", ".dat") + continue + except Exception as e: + print("Cannot parse video information.:{} {}".format(video_id, type(e))) continue print(f"\n" diff --git a/pytchat/cli/arguments.py b/pytchat/cli/arguments.py index 0e1baf4..be720c8 100644 --- a/pytchat/cli/arguments.py +++ b/pytchat/cli/arguments.py @@ -19,6 +19,7 @@ class Arguments(metaclass=Singleton): OUTPUT: str = 'output_dir' VIDEO_IDS: str = 'video_id' SAVE_ERROR_DATA: bool = 'save_error_data' + ECHO: bool = 'echo' def __init__(self, arguments: Optional[Dict[str, Union[str, bool, int]]] = None): @@ -36,8 +37,9 @@ class Arguments(metaclass=Singleton): self.output: str = arguments[Arguments.Name.OUTPUT] self.video_ids: List[int] = [] self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] - + self.echo: bool = arguments[Arguments.Name.ECHO] # Videos + if arguments[Arguments.Name.VIDEO_IDS]: self.video_ids = [video_id for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')] diff --git a/pytchat/cli/echo.py b/pytchat/cli/echo.py new file mode 100644 index 0000000..95876ab --- /dev/null +++ b/pytchat/cli/echo.py @@ -0,0 +1,22 @@ +import pytchat +from ..exceptions import ChatDataFinished, NoContents +from ..util.extract_video_id import extract_video_id + + +class Echo: + def __init__(self, video_id): + self.video_id = extract_video_id(video_id) + + def run(self): + livechat = pytchat.create(self.video_id) + while livechat.is_alive(): + chatdata = livechat.get() + for c in chatdata.sync_items(): + print(f"{c.datetime} [{c.author.name}] {c.message} {c.amountString}") + + try: + livechat.raise_for_status() + except (ChatDataFinished, NoContents): + print("Chat finished.") + except Exception as e: + print(type(e), str(e)) diff --git a/pytchat/cli/progressbar.py b/pytchat/cli/progressbar.py index fe37591..ae6174d 100644 --- a/pytchat/cli/progressbar.py +++ b/pytchat/cli/progressbar.py @@ -1,5 +1,5 @@ ''' -This code for this progress bar is based on +This code is based on vladignatyev/progress.py https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 (MIT License) diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index 3a86a8a..e362819 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -1,4 +1,4 @@ -import logging +import logging # noqa from . import mylogger headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36', diff --git a/pytchat/core/__init__.py b/pytchat/core/__init__.py new file mode 100644 index 0000000..9b98c18 --- /dev/null +++ b/pytchat/core/__init__.py @@ -0,0 +1,7 @@ +from .pytchat import PytchatCore +from .. util.extract_video_id import extract_video_id + + +def create(video_id: str, **kwargs): + _vid = extract_video_id(video_id) + return PytchatCore(_vid, **kwargs) diff --git a/pytchat/core/pytchat.py b/pytchat/core/pytchat.py new file mode 100644 index 0000000..f3a8172 --- /dev/null +++ b/pytchat/core/pytchat.py @@ -0,0 +1,207 @@ +import httpx +import json +import signal +import time +import traceback +import urllib.parse +from ..parser.live import Parser +from .. import config +from .. import exceptions +from ..paramgen import liveparam, arcparam +from ..processors.default.processor import DefaultProcessor +from ..processors.combinator import Combinator +from ..util.extract_video_id import extract_video_id + +headers = config.headers +MAX_RETRY = 10 + + +class PytchatCore: + ''' + + Parameter + --------- + video_id : str + + seektime : int + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. + + processor : ChatProcessor + + interruptable : bool + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. + + force_replay : bool + force to fetch archived chat data, even if specified video is live. + + topchat_only : bool + If True, get only top chat. + + hold_exception : bool [default:True] + If True, when exceptions occur, the exception is held internally, + and can be raised by raise_for_status(). + + Attributes + --------- + _is_alive : bool + Flag to stop getting chat. + ''' + + _setup_finished = False + + def __init__(self, video_id, + seektime=-1, + processor=DefaultProcessor(), + interruptable=True, + force_replay=False, + topchat_only=False, + hold_exception=True, + logger=config.logger(__name__), + ): + self._video_id = extract_video_id(video_id) + self.seektime = seektime + if isinstance(processor, tuple): + self.processor = Combinator(processor) + else: + self.processor = processor + self._is_alive = True + self._is_replay = force_replay + self._hold_exception = hold_exception + self._exception_holder = None + self._parser = Parser( + is_replay=self._is_replay, + exception_holder=self._exception_holder + ) + self._first_fetch = True + self._fetch_url = "live_chat/get_live_chat?continuation=" + self._topchat_only = topchat_only + self._logger = logger + if interruptable: + signal.signal(signal.SIGINT, lambda a, b: self.terminate()) + self._setup() + + def _setup(self): + time.sleep(0.1) # sleep shortly to prohibit skipping fetching data + """Fetch first continuation parameter, + create and start _listen loop. + """ + self.continuation = liveparam.getparam(self._video_id, 3) + + def _get_chat_component(self): + + ''' Fetch chat data and store them into buffer, + get next continuaiton parameter and loop. + + Parameter + --------- + continuation : str + parameter for next chat data + ''' + try: + with httpx.Client(http2=True) as client: + if self.continuation and self._is_alive: + contents = self._get_contents(self.continuation, client, headers) + metadata, chatdata = self._parser.parse(contents) + timeout = metadata['timeoutMs'] / 1000 + chat_component = { + "video_id": self._video_id, + "timeout": timeout, + "chatdata": chatdata + } + self.continuation = metadata.get('continuation') + return chat_component + except exceptions.ChatParseException as e: + self._logger.debug(f"[{self._video_id}]{str(e)}") + self._raise_exception(e) + except (TypeError, json.JSONDecodeError) as e: + self._logger.error(f"{traceback.format_exc(limit=-1)}") + self._raise_exception(e) + + self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._raise_exception(exceptions.ChatDataFinished) + + def _get_contents(self, continuation, client, headers): + '''Get 'continuationContents' from livechat json. + If contents is None at first fetching, + try to fetch archive chat data. + + Return: + ------- + 'continuationContents' which includes metadata & chat data. + ''' + livechat_json = ( + self._get_livechat_json(continuation, client, headers) + ) + contents = self._parser.get_contents(livechat_json) + if self._first_fetch: + if contents is None or self._is_replay: + '''Try to fetch archive chat data.''' + self._parser.is_replay = True + self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" + continuation = arcparam.getparam( + self._video_id, self.seektime, self._topchat_only) + livechat_json = (self._get_livechat_json(continuation, client, headers)) + reload_continuation = self._parser.reload_continuation( + self._parser.get_contents(livechat_json)) + if reload_continuation: + livechat_json = (self._get_livechat_json( + reload_continuation, client, headers)) + contents = self._parser.get_contents(livechat_json) + self._is_replay = True + self._first_fetch = False + return contents + + def _get_livechat_json(self, continuation, client, headers): + ''' + Get json which includes chat data. + ''' + continuation = urllib.parse.quote(continuation) + livechat_json = None + err = None + url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" + for _ in range(MAX_RETRY + 1): + with client: + try: + livechat_json = client.get(url, headers=headers).json() + break + except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e: + err = e + time.sleep(2) + continue + else: + self._logger.error(f"[{self._video_id}]" + f"Exceeded retry count. Last error: {str(err)}") + self._raise_exception(exceptions.RetryExceedMaxCount()) + return livechat_json + + def get(self): + if self.is_alive(): + chat_component = self._get_chat_component() + return self.processor.process([chat_component]) + else: + return [] + + def is_replay(self): + return self._is_replay + + def is_alive(self): + return self._is_alive + + def terminate(self): + self._is_alive = False + self.processor.finalize() + + def raise_for_status(self): + if self._exception_holder is not None: + raise self._exception_holder + + def _raise_exception(self, exception: Exception = None): + self._is_alive = False + if self._hold_exception is False: + raise exception + self._exception_holder = exception diff --git a/pytchat/core_async/buffer.py b/pytchat/core_async/buffer.py index 9fbaac9..e985088 100644 --- a/pytchat/core_async/buffer.py +++ b/pytchat/core_async/buffer.py @@ -4,13 +4,13 @@ import asyncio class Buffer(asyncio.Queue): ''' - チャットデータを格納するバッファの役割を持つFIFOキュー + Buffer for storing chat data. Parameter --------- maxsize : int - 格納するチャットブロックの最大個数。0の場合は無限。 - 最大値を超える場合は古いチャットブロックから破棄される。 + Maximum number of chat blocks to be stored. + If it exceeds the maximum, the oldest chat block will be discarded. ''' def __init__(self, maxsize=0): diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 9ec5512..5dccb4c 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -22,54 +22,51 @@ MAX_RETRY = 10 class LiveChatAsync: - '''asyncioを利用してYouTubeのライブ配信のチャットデータを取得する。 + '''LiveChatAsync object fetches chat data and stores them + in a buffer with asyncio. Parameter --------- video_id : str - 動画ID seektime : int - (ライブチャット取得時は無視) - 取得開始するアーカイブ済みチャットの経過時間(秒) - マイナス値を指定した場合は、配信開始前のチャットも取得する。 + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. processor : ChatProcessor - チャットデータを加工するオブジェクト - buffer : Buffer(maxsize:20[default]) - チャットデータchat_componentを格納するバッファ。 - maxsize : 格納できるchat_componentの個数 - default値20個。1個で約5~10秒分。 + buffer : Buffer + buffer of chat data fetched background. interruptable : bool - Ctrl+Cによる処理中断を行うかどうか。 + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. callback : func - _listen()関数から一定間隔で自動的に呼びだす関数。 + function called periodically from _listen(). done_callback : func - listener終了時に呼び出すコールバック。 + function called when listener ends. exception_handler : func - 例外を処理する関数 direct_mode : bool - Trueの場合、bufferを使わずにcallbackを呼ぶ。 - Trueの場合、callbackの設定が必須 - (設定していない場合IllegalFunctionCall例外を発生させる) + If True, invoke specified callback function without using buffer. + callback is required. If not, IllegalFunctionCall will be raised. force_replay : bool - Trueの場合、ライブチャットが取得できる場合であっても - 強制的にアーカイブ済みチャットを取得する。 + force to fetch archived chat data, even if specified video is live. topchat_only : bool - Trueの場合、上位チャットのみ取得する。 + If True, get only top chat. Attributes --------- _is_alive : bool - チャット取得を停止するためのフラグ + Flag to stop getting chat. ''' _setup_finished = False @@ -114,31 +111,30 @@ class LiveChatAsync: self._set_exception_handler(exception_handler) if interruptable: signal.signal(signal.SIGINT, - (lambda a, b: asyncio.create_task( - LiveChatAsync.shutdown(None, signal.SIGINT, b)))) + (lambda a, b: self._keyboard_interrupt())) self._setup() def _setup(self): - # direct modeがTrueでcallback未設定の場合例外発生。 + # An exception is raised when direct mode is true and no callback is set. if self._direct_mode: if self._callback is None: raise exceptions.IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: - # direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 + # Create a default buffer if `direct_mode` is False and buffer is not set. if self._buffer is None: self._buffer = Buffer(maxsize=20) - # callbackが指定されている場合はcallbackを呼ぶループタスクを作成 + # Create a loop task to call callback if the `callback` param is specified. if self._callback is None: pass else: - # callbackを呼ぶループタスクの開始 + # Create a loop task to call callback if the `callback` param is specified. loop = asyncio.get_event_loop() loop.create_task(self._callback_loop(self._callback)) - # _listenループタスクの開始 + # Start a loop task for _listen() loop = asyncio.get_event_loop() self.listen_task = loop.create_task(self._startlisten()) - # add_done_callbackの登録 + # Register add_done_callback if self._done_callback is None: self.listen_task.add_done_callback(self._finish) else: @@ -194,7 +190,7 @@ class LiveChatAsync: self._logger.error(f"{traceback.format_exc(limit = -1)}") raise - self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._logger.debug(f"[{self._video_id}] finished fetching chat.") raise exceptions.ChatDataFinished async def _check_pause(self, continuation): @@ -246,30 +242,30 @@ class LiveChatAsync: ''' continuation = urllib.parse.quote(continuation) livechat_json = None - status_code = 0 url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" for _ in range(MAX_RETRY + 1): try: resp = await client.get(url, headers=headers) livechat_json = resp.json() break - except (httpx.HTTPError, json.JSONDecodeError): + except (json.JSONDecodeError, httpx.HTTPError): await asyncio.sleep(1) continue else: self._logger.error(f"[{self._video_id}]" - f"Exceeded retry count. status_code={status_code}") + f"Exceeded retry count.") return None return livechat_json async def _callback_loop(self, callback): - """ コンストラクタでcallbackを指定している場合、バックグラウンドで - callbackに指定された関数に一定間隔でチャットデータを投げる。 + """ If a callback is specified in the constructor, + it throws chat data at regular intervals to the + function specified in the callback in the backgroun Parameter --------- callback : func - 加工済みのチャットデータを渡す先の関数。 + function to which the processed chat data is passed. """ while self.is_alive(): items = await self._buffer.get() @@ -280,11 +276,13 @@ class LiveChatAsync: await self._callback(processed_chat) async def get(self): - """ bufferからデータを取り出し、processorに投げ、 - 加工済みのチャットデータを返す。 + """ + Retrieves data from the buffer, + throws it to the processor, + and returns the processed chat data. Returns - : Processorによって加工されたチャットデータ + : Chat data processed by the Processor """ if self._callback is None: if self.is_alive(): @@ -293,7 +291,7 @@ class LiveChatAsync: else: return [] raise exceptions.IllegalFunctionCall( - "既にcallbackを登録済みのため、get()は実行できません。") + "Callback parameter is already set, so get() cannot be performed.") def is_replay(self): return self._is_replay @@ -314,11 +312,11 @@ class LiveChatAsync: return self._is_alive def _finish(self, sender): - '''Listener終了時のコールバック''' + '''Called when the _listen() task finished.''' try: self._task_finished() except CancelledError: - self._logger.debug(f'[{self._video_id}]cancelled:{sender}') + self._logger.debug(f'[{self._video_id}] cancelled:{sender}') def terminate(self): if self._pauser.empty(): @@ -326,10 +324,14 @@ class LiveChatAsync: self._is_alive = False self._buffer.put_nowait({}) self.processor.finalize() - + + def _keyboard_interrupt(self): + self.exception = exceptions.ChatDataFinished() + self.terminate() + def _task_finished(self): ''' - Listenerを終了する。 + Terminate fetching chats. ''' if self.is_alive(): self.terminate() @@ -339,7 +341,7 @@ class LiveChatAsync: self.exception = e if not isinstance(e, exceptions.ChatParseException): self._logger.error(f'Internal exception - {type(e)}{str(e)}') - self._logger.info(f'[{self._video_id}]終了しました') + self._logger.info(f'[{self._video_id}] finished.') def raise_for_status(self): if self.exception is not None: @@ -349,15 +351,3 @@ class LiveChatAsync: def _set_exception_handler(cls, handler): loop = asyncio.get_event_loop() loop.set_exception_handler(handler) - - @classmethod - async def shutdown(cls, event, sig=None, handler=None): - cls._logger.debug("shutdown...") - tasks = [t for t in asyncio.all_tasks() if t is not - asyncio.current_task()] - [task.cancel() for task in tasks] - - cls._logger.debug("complete remaining tasks...") - await asyncio.gather(*tasks, return_exceptions=True) - loop = asyncio.get_event_loop() - loop.stop() diff --git a/pytchat/core_multithread/buffer.py b/pytchat/core_multithread/buffer.py index 966f2e9..8ead646 100644 --- a/pytchat/core_multithread/buffer.py +++ b/pytchat/core_multithread/buffer.py @@ -4,13 +4,13 @@ import queue class Buffer(queue.Queue): ''' - チャットデータを格納するバッファの役割を持つFIFOキュー + Buffer for storing chat data. Parameter --------- - max_size : int - 格納するチャットブロックの最大個数。0の場合は無限。 - 最大値を超える場合は古いチャットブロックから破棄される。 + maxsize : int + Maximum number of chat blocks to be stored. + If it exceeds the maximum, the oldest chat block will be discarded. ''' def __init__(self, maxsize=0): diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index f096d3f..8e025c2 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -21,54 +21,53 @@ MAX_RETRY = 10 class LiveChat: - ''' スレッドプールを利用してYouTubeのライブ配信のチャットデータを取得する + ''' + LiveChat object fetches chat data and stores them + in a buffer with ThreadpoolExecutor. Parameter --------- video_id : str - 動画ID seektime : int - (ライブチャット取得時は無視) - 取得開始するアーカイブ済みチャットの経過時間(秒) - マイナス値を指定した場合は、配信開始前のチャットも取得する。 + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. processor : ChatProcessor - チャットデータを加工するオブジェクト - buffer : Buffer(maxsize:20[default]) - チャットデータchat_componentを格納するバッファ。 - maxsize : 格納できるchat_componentの個数 - default値20個。1個で約5~10秒分。 + buffer : Buffer + buffer of chat data fetched background. interruptable : bool - Ctrl+Cによる処理中断を行うかどうか。 + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. callback : func - _listen()関数から一定間隔で自動的に呼びだす関数。 + function called periodically from _listen(). done_callback : func - listener終了時に呼び出すコールバック。 + function called when listener ends. direct_mode : bool - Trueの場合、bufferを使わずにcallbackを呼ぶ。 - Trueの場合、callbackの設定が必須 - (設定していない場合IllegalFunctionCall例外を発生させる) + If True, invoke specified callback function without using buffer. + callback is required. If not, IllegalFunctionCall will be raised. force_replay : bool - Trueの場合、ライブチャットが取得できる場合であっても - 強制的にアーカイブ済みチャットを取得する。 + force to fetch archived chat data, even if specified video is live. topchat_only : bool - Trueの場合、上位チャットのみ取得する。 + If True, get only top chat. Attributes --------- _executor : ThreadPoolExecutor - チャットデータ取得ループ(_listen)用のスレッド + This is used for _listen() loop. _is_alive : bool - チャット取得を停止するためのフラグ + Flag to stop getting chat. ''' _setup_finished = False @@ -112,24 +111,24 @@ class LiveChat: self._setup() def _setup(self): - # direct modeがTrueでcallback未設定の場合例外発生。 + # An exception is raised when direct mode is true and no callback is set. if self._direct_mode: if self._callback is None: raise exceptions.IllegalFunctionCall( "When direct_mode=True, callback parameter is required.") else: - # direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 + # Create a default buffer if `direct_mode` is False and buffer is not set. if self._buffer is None: self._buffer = Buffer(maxsize=20) - # callbackが指定されている場合はcallbackを呼ぶループタスクを作成 + # Create a loop task to call callback if the `callback` param is specified. if self._callback is None: pass else: - # callbackを呼ぶループタスクの開始 + # Start a loop task calling callback function. self._executor.submit(self._callback_loop, self._callback) - # _listenループタスクの開始 + # Start a loop task for _listen() self.listen_task = self._executor.submit(self._startlisten) - # add_done_callbackの登録 + # Register add_done_callback if self._done_callback is None: self.listen_task.add_done_callback(self._finish) else: @@ -184,7 +183,7 @@ class LiveChat: self._logger.error(f"{traceback.format_exc(limit=-1)}") raise - self._logger.debug(f"[{self._video_id}]finished fetching chat.") + self._logger.debug(f"[{self._video_id}] finished fetching chat.") raise exceptions.ChatDataFinished def _check_pause(self, continuation): @@ -236,30 +235,30 @@ class LiveChat: ''' continuation = urllib.parse.quote(continuation) livechat_json = None - status_code = 0 url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" for _ in range(MAX_RETRY + 1): with client: try: livechat_json = client.get(url, headers=headers).json() break - except json.JSONDecodeError: - time.sleep(1) + except (json.JSONDecodeError, httpx.HTTPError): + time.sleep(2) continue else: self._logger.error(f"[{self._video_id}]" - f"Exceeded retry count. status_code={status_code}") + f"Exceeded retry count.") raise exceptions.RetryExceedMaxCount() return livechat_json def _callback_loop(self, callback): - """ コンストラクタでcallbackを指定している場合、バックグラウンドで - callbackに指定された関数に一定間隔でチャットデータを投げる。 + """ If a callback is specified in the constructor, + it throws chat data at regular intervals to the + function specified in the callback in the backgroun Parameter --------- callback : func - 加工済みのチャットデータを渡す先の関数。 + function to which the processed chat data is passed. """ while self.is_alive(): items = self._buffer.get() @@ -270,11 +269,13 @@ class LiveChat: self._callback(processed_chat) def get(self): - """ bufferからデータを取り出し、processorに投げ、 - 加工済みのチャットデータを返す。 + """ + Retrieves data from the buffer, + throws it to the processor, + and returns the processed chat data. Returns - : Processorによって加工されたチャットデータ + : Chat data processed by the Processor """ if self._callback is None: if self.is_alive(): @@ -283,7 +284,7 @@ class LiveChat: else: return [] raise exceptions.IllegalFunctionCall( - "既にcallbackを登録済みのため、get()は実行できません。") + "Callback parameter is already set, so get() cannot be performed.") def is_replay(self): return self._is_replay @@ -304,13 +305,16 @@ class LiveChat: return self._is_alive def _finish(self, sender): - '''Listener終了時のコールバック''' + '''Called when the _listen() task finished.''' try: self._task_finished() except CancelledError: - self._logger.debug(f'[{self._video_id}]cancelled:{sender}') + self._logger.debug(f'[{self._video_id}] cancelled:{sender}') def terminate(self): + ''' + Terminate fetching chats. + ''' if self._pauser.empty(): self._pauser.put_nowait(None) self._is_alive = False @@ -319,9 +323,6 @@ class LiveChat: self.processor.finalize() def _task_finished(self): - ''' - Listenerを終了する。 - ''' if self.is_alive(): self.terminate() try: @@ -330,7 +331,7 @@ class LiveChat: self.exception = e if not isinstance(e, exceptions.ChatParseException): self._logger.error(f'Internal exception - {type(e)}{str(e)}') - self._logger.info(f'[{self._video_id}]終了しました') + self._logger.info(f'[{self._video_id}] finished.') def raise_for_status(self): if self.exception is not None: diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index 1bd918c..4212fdc 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -76,6 +76,6 @@ class PatternUnmatchError(VideoInfoParseError): ''' Thrown when failed to parse video info with unmatched pattern. ''' - def __init__(self, doc): + def __init__(self, doc=''): self.msg = "PatternUnmatchError" self.doc = doc diff --git a/pytchat/paramgen/arcparam_mining.py b/pytchat/paramgen/arcparam_mining.py deleted file mode 100644 index 7e3525a..0000000 --- a/pytchat/paramgen/arcparam_mining.py +++ /dev/null @@ -1,133 +0,0 @@ -from base64 import urlsafe_b64encode as b64enc -from functools import reduce -import urllib.parse - -''' -Generate continuation parameter of youtube replay chat. - -Author: taizan-hokuto (2019) @taizan205 - -ver 0.0.1 2019.10.05 -''' - - -def _gen_vid_long(video_id): - """generate video_id parameter. - Parameter - --------- - video_id : str - - Return - --------- - byte[] : base64 encoded video_id parameter. - """ - header_magic = b'\x0A\x0F\x1A\x0D\x0A' - header_id = video_id.encode() - header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B' - header_terminator = b'\x20\x01' - - item = [ - header_magic, - _nval(len(header_id)), - header_id, - header_sep_1, - header_id, - header_terminator - ] - - return urllib.parse.quote( - b64enc(reduce(lambda x, y: x + y, item)).decode() - ).encode() - - -def _gen_vid(video_id): - """generate video_id parameter. - Parameter - --------- - video_id : str - - Return - --------- - bytes : base64 encoded video_id parameter. - """ - header_magic = b'\x0A\x0F\x1A\x0D\x0A' - header_id = video_id.encode() - header_terminator = b'\x20\x01' - - item = [ - header_magic, - _nval(len(header_id)), - header_id, - header_terminator - ] - - return urllib.parse.quote( - b64enc(reduce(lambda x, y: x + y, item)).decode() - ).encode() - - -def _nval(val): - """convert value to byte array""" - if val < 0: - raise ValueError - buf = b'' - while val >> 7: - m = val & 0xFF | 0x80 - buf += m.to_bytes(1, 'big') - val >>= 7 - buf += val.to_bytes(1, 'big') - return buf - - -def _build(video_id, seektime, topchat_only): - switch_01 = b'\x04' if topchat_only else b'\x01' - if seektime < 0: - raise ValueError("seektime must be greater than or equal to zero.") - if seektime == 0: - times = b'' - else: - times = _nval(int(seektime * 1000)) - if seektime > 0: - _len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10' - else: - _len_time = b'' - - header_magic = b'\xA2\x9D\xB0\xD3\x04' - sep_0 = b'\x1A' - vid = _gen_vid(video_id) - _tag = b'\x40\x01' - timestamp1 = times - sep_1 = b'\x60\x04\x72\x02\x08' - terminator = b'\x78\x01' - - body = [ - sep_0, - _nval(len(vid)), - vid, - _tag, - _len_time, - timestamp1, - sep_1, - switch_01, - terminator - ] - - body = reduce(lambda x, y: x + y, body) - - return urllib.parse.quote( - b64enc(header_magic + _nval(len(body)) + body - ).decode() - ) - - -def getparam(video_id, seektime=0.0, topchat_only=False): - ''' - Parameter - --------- - seektime : int - unit:seconds - start position of fetching chat data. - topchat_only : bool - if True, fetch only 'top chat' - ''' - return _build(video_id, seektime, topchat_only) diff --git a/pytchat/parser/live.py b/pytchat/parser/live.py index 13540cb..c3e10b3 100644 --- a/pytchat/parser/live.py +++ b/pytchat/parser/live.py @@ -8,15 +8,26 @@ from .. import exceptions class Parser: + ''' + Parser of chat json. + + Parameter + ---------- + is_replay : bool - __slots__ = ['is_replay'] + exception_holder : Object [default:Npne] + The object holding exceptions. + This is passed from the parent livechat object. + ''' + __slots__ = ['is_replay', 'exception_holder'] - def __init__(self, is_replay): + def __init__(self, is_replay, exception_holder=None): self.is_replay = is_replay + self.exception_holder = exception_holder def get_contents(self, jsn): if jsn is None: - raise exceptions.IllegalFunctionCall('Called with none JSON object.') + self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.')) if jsn['response']['responseContext'].get('errors'): raise exceptions.ResponseContextError( 'The video_id would be wrong, or video is deleted or private.') @@ -42,11 +53,11 @@ class Parser: if contents is None: '''Broadcasting end or cannot fetch chat stream''' - raise exceptions.NoContents('Chat data stream is empty.') + self.raise_exception(exceptions.NoContents('Chat data stream is empty.')) cont = contents['liveChatContinuation']['continuations'][0] if cont is None: - raise exceptions.NoContinuation('No Continuation') + self.raise_exception(exceptions.NoContinuation('No Continuation')) metadata = (cont.get('invalidationContinuationData') or cont.get('timedContinuationData') or cont.get('reloadContinuationData') @@ -54,13 +65,13 @@ class Parser: ) if metadata is None: if cont.get("playerSeekContinuationData"): - raise exceptions.ChatDataFinished('Finished chat data') + self.raise_exception(exceptions.ChatDataFinished('Finished chat data')) unknown = list(cont.keys())[0] if unknown: - raise exceptions.ReceivedUnknownContinuation( - f"Received unknown continuation type:{unknown}") + self.raise_exception(exceptions.ReceivedUnknownContinuation( + f"Received unknown continuation type:{unknown}")) else: - raise exceptions.FailedExtractContinuation('Cannot extract continuation data') + self.raise_exception(exceptions.FailedExtractContinuation('Cannot extract continuation data')) return self._create_data(metadata, contents) def reload_continuation(self, contents): @@ -72,7 +83,7 @@ class Parser: """ if contents is None: '''Broadcasting end or cannot fetch chat stream''' - raise exceptions.NoContents('Chat data stream is empty.') + self.raise_exception(exceptions.NoContents('Chat data stream is empty.')) cont = contents['liveChatContinuation']['continuations'][0] if cont.get("liveChatReplayContinuationData"): # chat data exist. @@ -81,7 +92,7 @@ class Parser: init_cont = cont.get("playerSeekContinuationData") if init_cont: return init_cont.get("continuation") - raise exceptions.ChatDataFinished('Finished chat data') + self.raise_exception(exceptions.ChatDataFinished('Finished chat data')) def _create_data(self, metadata, contents): actions = contents['liveChatContinuation'].get('actions') @@ -103,3 +114,8 @@ class Parser: start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"]) last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"]) return (last - start) + + def raise_exception(self, exception): + if self.exception_holder is None: + raise exception + self.exception_holder = exception diff --git a/pytchat/processors/combinator.py b/pytchat/processors/combinator.py index 7784418..ff07da9 100644 --- a/pytchat/processors/combinator.py +++ b/pytchat/processors/combinator.py @@ -36,3 +36,7 @@ class Combinator(ChatProcessor): ''' return tuple(processor.process(chat_components) for processor in self.processors) + + def finalize(self, *args, **kwargs): + [processor.finalize(*args, **kwargs) + for processor in self.processors] diff --git a/pytchat/processors/default/custom_encoder.py b/pytchat/processors/default/custom_encoder.py new file mode 100644 index 0000000..1916d1b --- /dev/null +++ b/pytchat/processors/default/custom_encoder.py @@ -0,0 +1,11 @@ +import json +from .renderer.base import Author +from .renderer.paidmessage import Colors +from .renderer.paidsticker import Colors2 + + +class CustomEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Author) or isinstance(obj, Colors) or isinstance(obj, Colors2): + return vars(obj) + return json.JSONEncoder.default(self, obj) diff --git a/pytchat/processors/default/processor.py b/pytchat/processors/default/processor.py index c4f8f47..d5e87a8 100644 --- a/pytchat/processors/default/processor.py +++ b/pytchat/processors/default/processor.py @@ -1,5 +1,7 @@ import asyncio +import json import time +from .custom_encoder import CustomEncoder from .renderer.textmessage import LiveChatTextMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer @@ -11,25 +13,120 @@ from ... import config logger = config.logger(__name__) +class Chat: + def json(self) -> str: + return json.dumps(vars(self), ensure_ascii=False, cls=CustomEncoder) + + class Chatdata: - def __init__(self, chatlist: list, timeout: float): + + def __init__(self, chatlist: list, timeout: float, abs_diff): self.items = chatlist self.interval = timeout + self.abs_diff = abs_diff + self.itemcount = 0 def tick(self): - if self.interval == 0: + '''DEPRECATE + Use sync_items() + ''' + if len(self.items) < 1: time.sleep(1) return - time.sleep(self.interval / len(self.items)) + if self.itemcount == 0: + self.starttime = time.time() + if len(self.items) == 1: + total_itemcount = 1 + else: + total_itemcount = len(self.items) - 1 + next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000 + tobe_disptime = self.abs_diff + next_chattime + wait_sec = tobe_disptime - time.time() + self.itemcount += 1 + + if wait_sec < 0: + wait_sec = 0 + + time.sleep(wait_sec) async def tick_async(self): - if self.interval == 0: + '''DEPRECATE + Use async_items() + ''' + if len(self.items) < 1: await asyncio.sleep(1) return - await asyncio.sleep(self.interval / len(self.items)) + if self.itemcount == 0: + self.starttime = time.time() + if len(self.items) == 1: + total_itemcount = 1 + else: + total_itemcount = len(self.items) - 1 + next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000 + tobe_disptime = self.abs_diff + next_chattime + wait_sec = tobe_disptime - time.time() + self.itemcount += 1 + + if wait_sec < 0: + wait_sec = 0 + + await asyncio.sleep(wait_sec) + + def sync_items(self): + starttime = time.time() + if len(self.items) > 0: + last_chattime = self.items[-1].timestamp / 1000 + tobe_disptime = self.abs_diff + last_chattime + wait_total_sec = max(tobe_disptime - time.time(), 0) + if len(self.items) > 1: + wait_sec = wait_total_sec / len(self.items) + elif len(self.items) == 1: + wait_sec = 0 + for c in self.items: + if wait_sec < 0: + wait_sec = 0 + time.sleep(wait_sec) + yield c + stop_interval = time.time() - starttime + if stop_interval < 1: + time.sleep(1 - stop_interval) + + async def async_items(self): + starttime = time.time() + if len(self.items) > 0: + last_chattime = self.items[-1].timestamp / 1000 + tobe_disptime = self.abs_diff + last_chattime + wait_total_sec = max(tobe_disptime - time.time(), 0) + if len(self.items) > 1: + wait_sec = wait_total_sec / len(self.items) + elif len(self.items) == 1: + wait_sec = 0 + for c in self.items: + if wait_sec < 0: + wait_sec = 0 + await asyncio.sleep(wait_sec) + yield c + + stop_interval = time.time() - starttime + if stop_interval < 1: + await asyncio.sleep(1 - stop_interval) + + def json(self) -> str: + return json.dumps([vars(a) for a in self.items], ensure_ascii=False, cls=CustomEncoder) class DefaultProcessor(ChatProcessor): + def __init__(self): + self.first = True + self.abs_diff = 0 + self.renderers = { + "liveChatTextMessageRenderer": LiveChatTextMessageRenderer(), + "liveChatPaidMessageRenderer": LiveChatPaidMessageRenderer(), + "liveChatPaidStickerRenderer": LiveChatPaidStickerRenderer(), + "liveChatLegacyPaidMessageRenderer": LiveChatLegacyPaidMessageRenderer(), + "liveChatMembershipItemRenderer": LiveChatMembershipItemRenderer() + } + def process(self, chat_components: list): chatlist = [] @@ -37,6 +134,8 @@ class DefaultProcessor(ChatProcessor): if chat_components: for component in chat_components: + if component is None: + continue timeout += component.get('timeout', 0) chatdata = component.get('chatdata') if chatdata is None: @@ -46,43 +145,35 @@ class DefaultProcessor(ChatProcessor): continue if action.get('addChatItemAction') is None: continue - if action['addChatItemAction'].get('item') is None: + item = action['addChatItemAction'].get('item') + if item is None: continue - - chat = self._parse(action) + chat = self._parse(item) if chat: chatlist.append(chat) - return Chatdata(chatlist, float(timeout)) + + if self.first and chatlist: + self.abs_diff = time.time() - chatlist[0].timestamp / 1000 + 2 + self.first = False - def _parse(self, sitem): - action = sitem.get("addChatItemAction") - if action: - item = action.get("item") - if item is None: - return None + chatdata = Chatdata(chatlist, float(timeout), self.abs_diff) + + return chatdata + + def _parse(self, item): try: - renderer = self._get_renderer(item) + key = list(item.keys())[0] + renderer = self.renderers.get(key) if renderer is None: return None - + renderer.setitem(item.get(key), Chat()) + renderer.settype() renderer.get_snippet() renderer.get_authordetails() + rendered_chatobj = renderer.get_chatobj() + renderer.clear() except (KeyError, TypeError) as e: - logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}") + logger.error(f"{str(type(e))}-{str(e)} item:{str(item)}") return None - return renderer - - def _get_renderer(self, item): - if item.get("liveChatTextMessageRenderer"): - renderer = LiveChatTextMessageRenderer(item) - elif item.get("liveChatPaidMessageRenderer"): - renderer = LiveChatPaidMessageRenderer(item) - elif item.get("liveChatPaidStickerRenderer"): - renderer = LiveChatPaidStickerRenderer(item) - elif item.get("liveChatLegacyPaidMessageRenderer"): - renderer = LiveChatLegacyPaidMessageRenderer(item) - elif item.get("liveChatMembershipItemRenderer"): - renderer = LiveChatMembershipItemRenderer(item) - else: - renderer = None - return renderer + + return rendered_chatobj diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py index 64fbecc..d6826c9 100644 --- a/pytchat/processors/default/renderer/base.py +++ b/pytchat/processors/default/renderer/base.py @@ -6,89 +6,96 @@ class Author: class BaseRenderer: - def __init__(self, item, chattype): - self.renderer = list(item.values())[0] - self.chattype = chattype - self.author = Author() + def setitem(self, item, chat): + self.item = item + self.chat = chat + self.chat.author = Author() + + def settype(self): + pass def get_snippet(self): - self.type = self.chattype - self.id = self.renderer.get('id') - timestampUsec = int(self.renderer.get("timestampUsec", 0)) - self.timestamp = int(timestampUsec / 1000) - tst = self.renderer.get("timestampText") + self.chat.id = self.item.get('id') + timestampUsec = int(self.item.get("timestampUsec", 0)) + self.chat.timestamp = int(timestampUsec / 1000) + tst = self.item.get("timestampText") if tst: - self.elapsedTime = tst.get("simpleText") + self.chat.elapsedTime = tst.get("simpleText") else: - self.elapsedTime = "" - self.datetime = self.get_datetime(timestampUsec) - self.message, self.messageEx = self.get_message(self.renderer) - self.id = self.renderer.get('id') - self.amountValue = 0.0 - self.amountString = "" - self.currency = "" - self.bgColor = 0 + self.chat.elapsedTime = "" + self.chat.datetime = self.get_datetime(timestampUsec) + self.chat.message, self.chat.messageEx = self.get_message(self.item) + self.chat.id = self.item.get('id') + self.chat.amountValue = 0.0 + self.chat.amountString = "" + self.chat.currency = "" + self.chat.bgColor = 0 def get_authordetails(self): - self.author.badgeUrl = "" - (self.author.isVerified, - self.author.isChatOwner, - self.author.isChatSponsor, - self.author.isChatModerator) = ( - self.get_badges(self.renderer) + self.chat.author.badgeUrl = "" + (self.chat.author.isVerified, + self.chat.author.isChatOwner, + self.chat.author.isChatSponsor, + self.chat.author.isChatModerator) = ( + self.get_badges(self.item) ) - self.author.channelId = self.renderer.get("authorExternalChannelId") - self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId - self.author.name = self.renderer["authorName"]["simpleText"] - self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"] + self.chat.author.channelId = self.item.get("authorExternalChannelId") + self.chat.author.channelUrl = "http://www.youtube.com/channel/" + self.chat.author.channelId + self.chat.author.name = self.item["authorName"]["simpleText"] + self.chat.author.imageUrl = self.item["authorPhoto"]["thumbnails"][1]["url"] - def get_message(self, renderer): + def get_message(self, item): message = '' message_ex = [] - if renderer.get("message"): - runs = renderer["message"].get("runs") - if runs: - for r in runs: - if r: - if r.get('emoji'): - message += r['emoji'].get('shortcuts', [''])[0] - message_ex.append({ - 'id': r['emoji'].get('emojiId').split('/')[-1], - 'txt': r['emoji'].get('shortcuts', [''])[0], - 'url': r['emoji']['image']['thumbnails'][0].get('url') - }) - else: - message += r.get('text', '') - message_ex.append(r.get('text', '')) + runs = item.get("message", {}).get("runs", {}) + for r in runs: + if not hasattr(r, "get"): + continue + if r.get('emoji'): + message += r['emoji'].get('shortcuts', [''])[0] + message_ex.append({ + 'id': r['emoji'].get('emojiId').split('/')[-1], + 'txt': r['emoji'].get('shortcuts', [''])[0], + 'url': r['emoji']['image']['thumbnails'][0].get('url') + }) + else: + message += r.get('text', '') + message_ex.append(r.get('text', '')) return message, message_ex def get_badges(self, renderer): - self.author.type = '' + self.chat.author.type = '' isVerified = False isChatOwner = False isChatSponsor = False isChatModerator = False - badges = renderer.get("authorBadges") - if badges: - for badge in badges: - if badge["liveChatAuthorBadgeRenderer"].get("icon"): - author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] - self.author.type = author_type - if author_type == 'VERIFIED': - isVerified = True - if author_type == 'OWNER': - isChatOwner = True - if author_type == 'MODERATOR': - isChatModerator = True - if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"): - isChatSponsor = True - self.author.type = 'MEMBER' - self.get_badgeurl(badge) + badges = renderer.get("authorBadges", {}) + for badge in badges: + if badge["liveChatAuthorBadgeRenderer"].get("icon"): + author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] + self.chat.author.type = author_type + if author_type == 'VERIFIED': + isVerified = True + if author_type == 'OWNER': + isChatOwner = True + if author_type == 'MODERATOR': + isChatModerator = True + if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"): + isChatSponsor = True + self.chat.author.type = 'MEMBER' + self.get_badgeurl(badge) return isVerified, isChatOwner, isChatSponsor, isChatModerator def get_badgeurl(self, badge): - self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] + self.chat.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] def get_datetime(self, timestamp): dt = datetime.fromtimestamp(timestamp / 1000000) return dt.strftime('%Y-%m-%d %H:%M:%S') + + def get_chatobj(self): + return self.chat + + def clear(self): + self.item = None + self.chat = None diff --git a/pytchat/processors/default/renderer/legacypaid.py b/pytchat/processors/default/renderer/legacypaid.py index dc6b2e4..f3b31c4 100644 --- a/pytchat/processors/default/renderer/legacypaid.py +++ b/pytchat/processors/default/renderer/legacypaid.py @@ -2,14 +2,14 @@ from .base import BaseRenderer class LiveChatLegacyPaidMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "newSponsor") + def settype(self): + self.chat.type = "newSponsor" def get_authordetails(self): super().get_authordetails() - self.author.isChatSponsor = True + self.chat.author.isChatSponsor = True - def get_message(self, renderer): - message = (renderer["eventText"]["runs"][0]["text"] - ) + ' / ' + (renderer["detailText"]["simpleText"]) + def get_message(self, item): + message = (item["eventText"]["runs"][0]["text"] + ) + ' / ' + (item["detailText"]["simpleText"]) return message, [message] diff --git a/pytchat/processors/default/renderer/membership.py b/pytchat/processors/default/renderer/membership.py index 7a7d100..f198abf 100644 --- a/pytchat/processors/default/renderer/membership.py +++ b/pytchat/processors/default/renderer/membership.py @@ -2,14 +2,17 @@ from .base import BaseRenderer class LiveChatMembershipItemRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "newSponsor") + def settype(self): + self.chat.type = "newSponsor" def get_authordetails(self): super().get_authordetails() - self.author.isChatSponsor = True + self.chat.author.isChatSponsor = True - def get_message(self, renderer): - message = ''.join([mes.get("text", "") - for mes in renderer["headerSubtext"]["runs"]]) + def get_message(self, item): + try: + message = ''.join([mes.get("text", "") + for mes in item["headerSubtext"]["runs"]]) + except KeyError: + return "Welcome New Member!", ["Welcome New Member!"] return message, [message] diff --git a/pytchat/processors/default/renderer/paidmessage.py b/pytchat/processors/default/renderer/paidmessage.py index 70bd055..19ae001 100644 --- a/pytchat/processors/default/renderer/paidmessage.py +++ b/pytchat/processors/default/renderer/paidmessage.py @@ -9,23 +9,23 @@ class Colors: class LiveChatPaidMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "superChat") + def settype(self): + self.chat.type = "superChat" def get_snippet(self): super().get_snippet() amountDisplayString, symbol, amount = ( - self.get_amountdata(self.renderer) + self.get_amountdata(self.item) ) - self.amountValue = amount - self.amountString = amountDisplayString - self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + self.chat.amountValue = amount + self.chat.amountString = amountDisplayString + self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( symbol) else symbol - self.bgColor = self.renderer.get("bodyBackgroundColor", 0) - self.colors = self.get_colors() + self.chat.bgColor = self.item.get("bodyBackgroundColor", 0) + self.chat.colors = self.get_colors() - def get_amountdata(self, renderer): - amountDisplayString = renderer["purchaseAmountText"]["simpleText"] + def get_amountdata(self, item): + amountDisplayString = item["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) @@ -36,11 +36,12 @@ class LiveChatPaidMessageRenderer(BaseRenderer): return amountDisplayString, symbol, amount def get_colors(self): + item = self.item colors = Colors() - colors.headerBackgroundColor = self.renderer.get("headerBackgroundColor", 0) - colors.headerTextColor = self.renderer.get("headerTextColor", 0) - colors.bodyBackgroundColor = self.renderer.get("bodyBackgroundColor", 0) - colors.bodyTextColor = self.renderer.get("bodyTextColor", 0) - colors.timestampColor = self.renderer.get("timestampColor", 0) - colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) + colors.headerBackgroundColor = item.get("headerBackgroundColor", 0) + colors.headerTextColor = item.get("headerTextColor", 0) + colors.bodyBackgroundColor = item.get("bodyBackgroundColor", 0) + colors.bodyTextColor = item.get("bodyTextColor", 0) + colors.timestampColor = item.get("timestampColor", 0) + colors.authorNameTextColor = item.get("authorNameTextColor", 0) return colors diff --git a/pytchat/processors/default/renderer/paidsticker.py b/pytchat/processors/default/renderer/paidsticker.py index 6a52a7e..cfa4fa1 100644 --- a/pytchat/processors/default/renderer/paidsticker.py +++ b/pytchat/processors/default/renderer/paidsticker.py @@ -4,30 +4,30 @@ from .base import BaseRenderer superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") -class Colors: +class Colors2: pass class LiveChatPaidStickerRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "superSticker") + def settype(self): + self.chat.type = "superSticker" def get_snippet(self): super().get_snippet() amountDisplayString, symbol, amount = ( - self.get_amountdata(self.renderer) + self.get_amountdata(self.item) ) - self.amountValue = amount - self.amountString = amountDisplayString - self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( + self.chat.amountValue = amount + self.chat.amountString = amountDisplayString + self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( symbol) else symbol - self.bgColor = self.renderer.get("backgroundColor", 0) - self.sticker = "".join(("https:", - self.renderer["sticker"]["thumbnails"][0]["url"])) - self.colors = self.get_colors() + self.chat.bgColor = self.item.get("backgroundColor", 0) + self.chat.sticker = "".join(("https:", + self.item["sticker"]["thumbnails"][0]["url"])) + self.chat.colors = self.get_colors() - def get_amountdata(self, renderer): - amountDisplayString = renderer["purchaseAmountText"]["simpleText"] + def get_amountdata(self, item): + amountDisplayString = item["purchaseAmountText"]["simpleText"] m = superchat_regex.search(amountDisplayString) if m: symbol = m.group(1) @@ -38,9 +38,10 @@ class LiveChatPaidStickerRenderer(BaseRenderer): return amountDisplayString, symbol, amount def get_colors(self): - colors = Colors() - colors.moneyChipBackgroundColor = self.renderer.get("moneyChipBackgroundColor", 0) - colors.moneyChipTextColor = self.renderer.get("moneyChipTextColor", 0) - colors.backgroundColor = self.renderer.get("backgroundColor", 0) - colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) + item = self.item + colors = Colors2() + colors.moneyChipBackgroundColor = item.get("moneyChipBackgroundColor", 0) + colors.moneyChipTextColor = item.get("moneyChipTextColor", 0) + colors.backgroundColor = item.get("backgroundColor", 0) + colors.authorNameTextColor = item.get("authorNameTextColor", 0) return colors diff --git a/pytchat/processors/default/renderer/textmessage.py b/pytchat/processors/default/renderer/textmessage.py index 475a70d..608cfef 100644 --- a/pytchat/processors/default/renderer/textmessage.py +++ b/pytchat/processors/default/renderer/textmessage.py @@ -2,5 +2,5 @@ from .base import BaseRenderer class LiveChatTextMessageRenderer(BaseRenderer): - def __init__(self, item): - super().__init__(item, "textMessage") + def settype(self): + self.chat.type = "textMessage" diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 95ada14..f969a3a 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -3,7 +3,7 @@ import os import re import time from base64 import standard_b64encode -from httpx import NetworkError, ReadTimeout +from concurrent.futures import ThreadPoolExecutor from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor from ..exceptions import UnknownConnectionError @@ -48,12 +48,14 @@ class HTMLArchiver(ChatProcessor): ''' def __init__(self, save_path, callback=None): super().__init__() + self.client = httpx.Client(http2=True) self.save_path = self._checkpath(save_path) self.processor = DefaultProcessor() self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.header = [HEADER_HTML] self.body = ['\n', '
|---|