Add simple core module
This commit is contained in:
@@ -2,13 +2,28 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.3.2'
|
__version__ = '0.4.0.dev1'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
__url__ = 'https://github.com/taizan-hokuto/pytchat'
|
__url__ = 'https://github.com/taizan-hokuto/pytchat'
|
||||||
|
|
||||||
__all__ = ["core_async","core_multithread","processors"]
|
|
||||||
|
from .exceptions import (
|
||||||
|
ChatParseException,
|
||||||
|
ResponseContextError,
|
||||||
|
NoContents,
|
||||||
|
NoContinuation,
|
||||||
|
IllegalFunctionCall,
|
||||||
|
InvalidVideoIdException,
|
||||||
|
UnknownConnectionError,
|
||||||
|
RetryExceedMaxCount,
|
||||||
|
ChatDataFinished,
|
||||||
|
ReceivedUnknownContinuation,
|
||||||
|
FailedExtractContinuation,
|
||||||
|
VideoInfoParseError,
|
||||||
|
PatternUnmatchError
|
||||||
|
)
|
||||||
|
|
||||||
from .api import (
|
from .api import (
|
||||||
cli,
|
cli,
|
||||||
@@ -26,7 +41,7 @@ from .api import (
|
|||||||
SimpleDisplayProcessor,
|
SimpleDisplayProcessor,
|
||||||
SpeedCalculator,
|
SpeedCalculator,
|
||||||
SuperchatCalculator,
|
SuperchatCalculator,
|
||||||
VideoInfo
|
VideoInfo,
|
||||||
|
create
|
||||||
)
|
)
|
||||||
|
|
||||||
# flake8: noqa
|
# flake8: noqa
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
from . import cli
|
from . import cli
|
||||||
from . import config
|
from . import config
|
||||||
|
from .core import create
|
||||||
from .core_multithread.livechat import LiveChat
|
from .core_multithread.livechat import LiveChat
|
||||||
from .core_async.livechat import LiveChatAsync
|
from .core_async.livechat import LiveChatAsync
|
||||||
from .processors.chat_processor import ChatProcessor
|
from .processors.chat_processor import ChatProcessor
|
||||||
@@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator
|
|||||||
from .tool.extract.extractor import Extractor
|
from .tool.extract.extractor import Extractor
|
||||||
from .tool.videoinfo import VideoInfo
|
from .tool.videoinfo import VideoInfo
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
cli,
|
||||||
|
config,
|
||||||
|
LiveChat,
|
||||||
|
LiveChatAsync,
|
||||||
|
ChatProcessor,
|
||||||
|
CompatibleProcessor,
|
||||||
|
DummyProcessor,
|
||||||
|
DefaultProcessor,
|
||||||
|
Extractor,
|
||||||
|
HTMLArchiver,
|
||||||
|
TSVArchiver,
|
||||||
|
JsonfileArchiver,
|
||||||
|
SimpleDisplayProcessor,
|
||||||
|
SpeedCalculator,
|
||||||
|
SuperchatCalculator,
|
||||||
|
VideoInfo,
|
||||||
|
create
|
||||||
|
]
|
||||||
|
|
||||||
# flake8: noqa
|
# flake8: noqa
|
||||||
@@ -5,6 +5,6 @@ headers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def logger(module_name: str, loglevel=None):
|
def logger(module_name: str, loglevel=logging.DEBUG):
|
||||||
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
||||||
return module_logger
|
return module_logger
|
||||||
|
|||||||
9
pytchat/core/__init__.py
Normal file
9
pytchat/core/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from .pytchat import PytchatCore
|
||||||
|
from .. util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
|
|
||||||
|
def create(video_id: str, **kwargs):
|
||||||
|
_vid = extract_video_id(video_id)
|
||||||
|
return PytchatCore(_vid, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
192
pytchat/core/pytchat.py
Normal file
192
pytchat/core/pytchat.py
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import httpx
|
||||||
|
import json
|
||||||
|
import signal
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import urllib.parse
|
||||||
|
from threading import Event
|
||||||
|
from ..parser.live import Parser
|
||||||
|
from .. import config
|
||||||
|
from .. import exceptions
|
||||||
|
from ..paramgen import liveparam, arcparam
|
||||||
|
from ..processors.default.processor import DefaultProcessor
|
||||||
|
from ..processors.combinator import Combinator
|
||||||
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
|
headers = config.headers
|
||||||
|
MAX_RETRY = 10
|
||||||
|
|
||||||
|
|
||||||
|
class PytchatCore:
|
||||||
|
'''
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
video_id : str
|
||||||
|
|
||||||
|
seektime : int
|
||||||
|
start position of fetching chat (seconds).
|
||||||
|
This option is valid for archived chat only.
|
||||||
|
If negative value, chat data posted before the start of the broadcast
|
||||||
|
will be retrieved as well.
|
||||||
|
|
||||||
|
processor : ChatProcessor
|
||||||
|
|
||||||
|
interruptable : bool
|
||||||
|
Allows keyboard interrupts.
|
||||||
|
Set this parameter to False if your own threading program causes
|
||||||
|
the problem.
|
||||||
|
|
||||||
|
force_replay : bool
|
||||||
|
force to fetch archived chat data, even if specified video is live.
|
||||||
|
|
||||||
|
topchat_only : bool
|
||||||
|
If True, get only top chat.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
---------
|
||||||
|
_is_alive : bool
|
||||||
|
Flag to stop getting chat.
|
||||||
|
'''
|
||||||
|
|
||||||
|
_setup_finished = False
|
||||||
|
|
||||||
|
def __init__(self, video_id,
|
||||||
|
seektime=-1,
|
||||||
|
processor=DefaultProcessor(),
|
||||||
|
interruptable=True,
|
||||||
|
force_replay=False,
|
||||||
|
topchat_only=False,
|
||||||
|
logger=config.logger(__name__)
|
||||||
|
):
|
||||||
|
self._video_id = extract_video_id(video_id)
|
||||||
|
self.seektime = seektime
|
||||||
|
if isinstance(processor, tuple):
|
||||||
|
self.processor = Combinator(processor)
|
||||||
|
else:
|
||||||
|
self.processor = processor
|
||||||
|
self._is_alive = True
|
||||||
|
self._is_replay = force_replay
|
||||||
|
self._parser = Parser(is_replay=self._is_replay)
|
||||||
|
self._first_fetch = True
|
||||||
|
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||||
|
self._topchat_only = topchat_only
|
||||||
|
self._logger = logger
|
||||||
|
self.exception = None
|
||||||
|
if interruptable:
|
||||||
|
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
|
||||||
|
self._setup()
|
||||||
|
|
||||||
|
def _setup(self):
|
||||||
|
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
|
||||||
|
"""Fetch first continuation parameter,
|
||||||
|
create and start _listen loop.
|
||||||
|
"""
|
||||||
|
self.continuation = liveparam.getparam(self._video_id, 3)
|
||||||
|
|
||||||
|
def _get_chat_component(self):
|
||||||
|
|
||||||
|
''' Fetch chat data and store them into buffer,
|
||||||
|
get next continuaiton parameter and loop.
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
continuation : str
|
||||||
|
parameter for next chat data
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
with httpx.Client(http2=True) as client:
|
||||||
|
if self.continuation and self._is_alive:
|
||||||
|
contents = self._get_contents(self.continuation, client, headers)
|
||||||
|
metadata, chatdata = self._parser.parse(contents)
|
||||||
|
timeout = metadata['timeoutMs'] / 1000
|
||||||
|
chat_component = {
|
||||||
|
"video_id": self._video_id,
|
||||||
|
"timeout": timeout,
|
||||||
|
"chatdata": chatdata
|
||||||
|
}
|
||||||
|
self.continuation = metadata.get('continuation')
|
||||||
|
return chat_component
|
||||||
|
except exceptions.ChatParseException as e:
|
||||||
|
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
||||||
|
raise
|
||||||
|
except (TypeError, json.JSONDecodeError):
|
||||||
|
self._logger.error(f"{traceback.format_exc(limit=-1)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._logger.debug(f"[{self._video_id}]finished fetching chat.")
|
||||||
|
raise exceptions.ChatDataFinished
|
||||||
|
|
||||||
|
def _get_contents(self, continuation, client, headers):
|
||||||
|
'''Get 'continuationContents' from livechat json.
|
||||||
|
If contents is None at first fetching,
|
||||||
|
try to fetch archive chat data.
|
||||||
|
|
||||||
|
Return:
|
||||||
|
-------
|
||||||
|
'continuationContents' which includes metadata & chat data.
|
||||||
|
'''
|
||||||
|
livechat_json = (
|
||||||
|
self._get_livechat_json(continuation, client, headers)
|
||||||
|
)
|
||||||
|
contents = self._parser.get_contents(livechat_json)
|
||||||
|
if self._first_fetch:
|
||||||
|
if contents is None or self._is_replay:
|
||||||
|
'''Try to fetch archive chat data.'''
|
||||||
|
self._parser.is_replay = True
|
||||||
|
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||||
|
continuation = arcparam.getparam(
|
||||||
|
self._video_id, self.seektime, self._topchat_only)
|
||||||
|
livechat_json = (self._get_livechat_json(continuation, client, headers))
|
||||||
|
reload_continuation = self._parser.reload_continuation(
|
||||||
|
self._parser.get_contents(livechat_json))
|
||||||
|
if reload_continuation:
|
||||||
|
livechat_json = (self._get_livechat_json(
|
||||||
|
reload_continuation, client, headers))
|
||||||
|
contents = self._parser.get_contents(livechat_json)
|
||||||
|
self._is_replay = True
|
||||||
|
self._first_fetch = False
|
||||||
|
return contents
|
||||||
|
|
||||||
|
def _get_livechat_json(self, continuation, client, headers):
|
||||||
|
'''
|
||||||
|
Get json which includes chat data.
|
||||||
|
'''
|
||||||
|
continuation = urllib.parse.quote(continuation)
|
||||||
|
livechat_json = None
|
||||||
|
status_code = 0
|
||||||
|
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||||
|
for _ in range(MAX_RETRY + 1):
|
||||||
|
with client:
|
||||||
|
try:
|
||||||
|
livechat_json = client.get(url, headers=headers).json()
|
||||||
|
break
|
||||||
|
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError):
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self._logger.error(f"[{self._video_id}]"
|
||||||
|
f"Exceeded retry count. status_code={status_code}")
|
||||||
|
raise exceptions.RetryExceedMaxCount()
|
||||||
|
return livechat_json
|
||||||
|
|
||||||
|
def get(self):
|
||||||
|
if self.is_alive():
|
||||||
|
chat_component = self._get_chat_component()
|
||||||
|
return self.processor.process([chat_component])
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def is_replay(self):
|
||||||
|
return self._is_replay
|
||||||
|
|
||||||
|
def is_alive(self):
|
||||||
|
return self._is_alive
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
self._is_alive = False
|
||||||
|
self.processor.finalize()
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
if self.exception is not None:
|
||||||
|
raise self.exception
|
||||||
Reference in New Issue
Block a user