From 4905b1e4d817b1a14659d2d71e5a27b662325694 Mon Sep 17 00:00:00 2001 From: taizan-hokouto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sat, 24 Oct 2020 18:07:54 +0900 Subject: [PATCH] Add simple core module --- pytchat/__init__.py | 23 ++++- pytchat/api.py | 21 ++++ pytchat/config/__init__.py | 2 +- pytchat/core/__init__.py | 9 ++ pytchat/core/pytchat.py | 192 +++++++++++++++++++++++++++++++++++++ 5 files changed, 242 insertions(+), 5 deletions(-) create mode 100644 pytchat/core/__init__.py create mode 100644 pytchat/core/pytchat.py diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 3343b8c..788ae45 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,13 +2,28 @@ pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.3.2' +__version__ = '0.4.0.dev1' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __url__ = 'https://github.com/taizan-hokuto/pytchat' -__all__ = ["core_async","core_multithread","processors"] + +from .exceptions import ( + ChatParseException, + ResponseContextError, + NoContents, + NoContinuation, + IllegalFunctionCall, + InvalidVideoIdException, + UnknownConnectionError, + RetryExceedMaxCount, + ChatDataFinished, + ReceivedUnknownContinuation, + FailedExtractContinuation, + VideoInfoParseError, + PatternUnmatchError +) from .api import ( cli, @@ -26,7 +41,7 @@ from .api import ( SimpleDisplayProcessor, SpeedCalculator, SuperchatCalculator, - VideoInfo + VideoInfo, + create ) - # flake8: noqa \ No newline at end of file diff --git a/pytchat/api.py b/pytchat/api.py index 7c67436..bf64e07 100644 --- a/pytchat/api.py +++ b/pytchat/api.py @@ -1,5 +1,6 @@ from . import cli from . import config +from .core import create from .core_multithread.livechat import LiveChat from .core_async.livechat import LiveChatAsync from .processors.chat_processor import ChatProcessor @@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator from .tool.extract.extractor import Extractor from .tool.videoinfo import VideoInfo +__all__ = [ + cli, + config, + LiveChat, + LiveChatAsync, + ChatProcessor, + CompatibleProcessor, + DummyProcessor, + DefaultProcessor, + Extractor, + HTMLArchiver, + TSVArchiver, + JsonfileArchiver, + SimpleDisplayProcessor, + SpeedCalculator, + SuperchatCalculator, + VideoInfo, + create +] + # flake8: noqa \ No newline at end of file diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index 3a86a8a..215ebdf 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -5,6 +5,6 @@ headers = { } -def logger(module_name: str, loglevel=None): +def logger(module_name: str, loglevel=logging.DEBUG): module_logger = mylogger.get_logger(module_name, loglevel=loglevel) return module_logger diff --git a/pytchat/core/__init__.py b/pytchat/core/__init__.py new file mode 100644 index 0000000..94f43a5 --- /dev/null +++ b/pytchat/core/__init__.py @@ -0,0 +1,9 @@ +from .pytchat import PytchatCore +from .. util.extract_video_id import extract_video_id + + +def create(video_id: str, **kwargs): + _vid = extract_video_id(video_id) + return PytchatCore(_vid, **kwargs) + + diff --git a/pytchat/core/pytchat.py b/pytchat/core/pytchat.py new file mode 100644 index 0000000..a1c9942 --- /dev/null +++ b/pytchat/core/pytchat.py @@ -0,0 +1,192 @@ +import httpx +import json +import signal +import time +import traceback +import urllib.parse +from threading import Event +from ..parser.live import Parser +from .. import config +from .. import exceptions +from ..paramgen import liveparam, arcparam +from ..processors.default.processor import DefaultProcessor +from ..processors.combinator import Combinator +from ..util.extract_video_id import extract_video_id + +headers = config.headers +MAX_RETRY = 10 + + +class PytchatCore: + ''' + + Parameter + --------- + video_id : str + + seektime : int + start position of fetching chat (seconds). + This option is valid for archived chat only. + If negative value, chat data posted before the start of the broadcast + will be retrieved as well. + + processor : ChatProcessor + + interruptable : bool + Allows keyboard interrupts. + Set this parameter to False if your own threading program causes + the problem. + + force_replay : bool + force to fetch archived chat data, even if specified video is live. + + topchat_only : bool + If True, get only top chat. + + Attributes + --------- + _is_alive : bool + Flag to stop getting chat. + ''' + + _setup_finished = False + + def __init__(self, video_id, + seektime=-1, + processor=DefaultProcessor(), + interruptable=True, + force_replay=False, + topchat_only=False, + logger=config.logger(__name__) + ): + self._video_id = extract_video_id(video_id) + self.seektime = seektime + if isinstance(processor, tuple): + self.processor = Combinator(processor) + else: + self.processor = processor + self._is_alive = True + self._is_replay = force_replay + self._parser = Parser(is_replay=self._is_replay) + self._first_fetch = True + self._fetch_url = "live_chat/get_live_chat?continuation=" + self._topchat_only = topchat_only + self._logger = logger + self.exception = None + if interruptable: + signal.signal(signal.SIGINT, lambda a, b: self.terminate()) + self._setup() + + def _setup(self): + time.sleep(0.1) # sleep shortly to prohibit skipping fetching data + """Fetch first continuation parameter, + create and start _listen loop. + """ + self.continuation = liveparam.getparam(self._video_id, 3) + + def _get_chat_component(self): + + ''' Fetch chat data and store them into buffer, + get next continuaiton parameter and loop. + + Parameter + --------- + continuation : str + parameter for next chat data + ''' + try: + with httpx.Client(http2=True) as client: + if self.continuation and self._is_alive: + contents = self._get_contents(self.continuation, client, headers) + metadata, chatdata = self._parser.parse(contents) + timeout = metadata['timeoutMs'] / 1000 + chat_component = { + "video_id": self._video_id, + "timeout": timeout, + "chatdata": chatdata + } + self.continuation = metadata.get('continuation') + return chat_component + except exceptions.ChatParseException as e: + self._logger.debug(f"[{self._video_id}]{str(e)}") + raise + except (TypeError, json.JSONDecodeError): + self._logger.error(f"{traceback.format_exc(limit=-1)}") + raise + + self._logger.debug(f"[{self._video_id}]finished fetching chat.") + raise exceptions.ChatDataFinished + + def _get_contents(self, continuation, client, headers): + '''Get 'continuationContents' from livechat json. + If contents is None at first fetching, + try to fetch archive chat data. + + Return: + ------- + 'continuationContents' which includes metadata & chat data. + ''' + livechat_json = ( + self._get_livechat_json(continuation, client, headers) + ) + contents = self._parser.get_contents(livechat_json) + if self._first_fetch: + if contents is None or self._is_replay: + '''Try to fetch archive chat data.''' + self._parser.is_replay = True + self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" + continuation = arcparam.getparam( + self._video_id, self.seektime, self._topchat_only) + livechat_json = (self._get_livechat_json(continuation, client, headers)) + reload_continuation = self._parser.reload_continuation( + self._parser.get_contents(livechat_json)) + if reload_continuation: + livechat_json = (self._get_livechat_json( + reload_continuation, client, headers)) + contents = self._parser.get_contents(livechat_json) + self._is_replay = True + self._first_fetch = False + return contents + + def _get_livechat_json(self, continuation, client, headers): + ''' + Get json which includes chat data. + ''' + continuation = urllib.parse.quote(continuation) + livechat_json = None + status_code = 0 + url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" + for _ in range(MAX_RETRY + 1): + with client: + try: + livechat_json = client.get(url, headers=headers).json() + break + except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError): + time.sleep(2) + continue + else: + self._logger.error(f"[{self._video_id}]" + f"Exceeded retry count. status_code={status_code}") + raise exceptions.RetryExceedMaxCount() + return livechat_json + + def get(self): + if self.is_alive(): + chat_component = self._get_chat_component() + return self.processor.process([chat_component]) + else: + return [] + + def is_replay(self): + return self._is_replay + + def is_alive(self): + return self._is_alive + + def terminate(self): + self._is_alive = False + self.processor.finalize() + + def raise_for_status(self): + if self.exception is not None: + raise self.exception