223 lines
7.9 KiB
Python
223 lines
7.9 KiB
Python
import httpx
|
|
import json
|
|
import signal
|
|
import time
|
|
import traceback
|
|
from ..parser.live import Parser
|
|
from .. import config
|
|
from .. import exceptions
|
|
from ..paramgen import liveparam, arcparam
|
|
from ..processors.default.processor import DefaultProcessor
|
|
from ..processors.combinator import Combinator
|
|
from .. import util
|
|
|
|
headers = config.headers
|
|
MAX_RETRY = 10
|
|
|
|
class PytchatCore:
|
|
'''
|
|
|
|
Parameter
|
|
---------
|
|
video_id : str
|
|
|
|
seektime : int
|
|
start position of fetching chat (seconds).
|
|
This option is valid for archived chat only.
|
|
If negative value, chat data posted before the start of the broadcast
|
|
will be retrieved as well.
|
|
|
|
processor : ChatProcessor
|
|
|
|
client : httpx.Client
|
|
The client for connecting youtube.
|
|
You can specify any customized httpx client (e.g. coolies, user agent).
|
|
|
|
interruptable : bool
|
|
Allows keyboard interrupts.
|
|
Set this parameter to False if your own multi-threading program causes
|
|
the problem.
|
|
|
|
force_replay : bool
|
|
force to fetch archived chat data, even if specified video is live.
|
|
|
|
topchat_only : bool
|
|
If True, get only top chat.
|
|
|
|
hold_exception : bool [default:True]
|
|
If True, when exceptions occur, the exception is held internally,
|
|
and can be raised by raise_for_status().
|
|
|
|
replay_continuation : str
|
|
If this parameter is not None, the processor will attempt to get chat data from continuation.
|
|
This parameter is only allowed in archived mode.
|
|
|
|
Attributes
|
|
---------
|
|
_is_alive : bool
|
|
Flag to stop getting chat.
|
|
'''
|
|
|
|
def __init__(self, video_id,
|
|
seektime=-1,
|
|
processor=DefaultProcessor(),
|
|
client = httpx.Client(http2=True),
|
|
interruptable=True,
|
|
force_replay=False,
|
|
topchat_only=False,
|
|
hold_exception=True,
|
|
logger=config.logger(__name__),
|
|
replay_continuation=None
|
|
):
|
|
self._client = client
|
|
self._video_id = util.extract_video_id(video_id)
|
|
self.seektime = seektime
|
|
if isinstance(processor, tuple):
|
|
self.processor = Combinator(processor)
|
|
else:
|
|
self.processor = processor
|
|
self._is_alive = True
|
|
self._is_replay = force_replay or (replay_continuation is not None)
|
|
self._hold_exception = hold_exception
|
|
self._exception_holder = None
|
|
self._parser = Parser(
|
|
is_replay=self._is_replay,
|
|
exception_holder=self._exception_holder
|
|
)
|
|
self._first_fetch = replay_continuation is None
|
|
self._fetch_url = config._sml if replay_continuation is None else config._smr
|
|
self._topchat_only = topchat_only
|
|
self._dat = ''
|
|
self._last_offset_ms = 0
|
|
self._logger = logger
|
|
self.continuation = replay_continuation
|
|
if interruptable:
|
|
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
|
|
self._setup()
|
|
|
|
def _setup(self):
|
|
if not self.continuation:
|
|
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
|
|
"""Fetch first continuation parameter,
|
|
create and start _listen loop.
|
|
"""
|
|
self.continuation = liveparam.getparam(
|
|
self._video_id,
|
|
channel_id=util.get_channelid(self._client, self._video_id),
|
|
past_sec=3)
|
|
|
|
def _get_chat_component(self):
|
|
''' Fetch chat data and store them into buffer,
|
|
get next continuaiton parameter and loop.
|
|
|
|
Parameter
|
|
---------
|
|
continuation : str
|
|
parameter for next chat data
|
|
'''
|
|
try:
|
|
if self.continuation and self._is_alive:
|
|
contents = self._get_contents(self.continuation, self._client, headers)
|
|
metadata, chatdata = self._parser.parse(contents)
|
|
timeout = metadata['timeoutMs'] / 1000
|
|
chat_component = {
|
|
"video_id": self._video_id,
|
|
"timeout": timeout,
|
|
"chatdata": chatdata
|
|
}
|
|
self.continuation = metadata.get('continuation')
|
|
self._last_offset_ms = metadata.get('last_offset_ms', 0)
|
|
return chat_component
|
|
except exceptions.ChatParseException as e:
|
|
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
|
self._raise_exception(e)
|
|
except Exception as e:
|
|
self._logger.error(f"{traceback.format_exc(limit=-1)}")
|
|
self._raise_exception(e)
|
|
|
|
def _get_contents(self, continuation, client, headers):
|
|
'''Get 'continuationContents' from livechat json.
|
|
If contents is None at first fetching,
|
|
try to fetch archive chat data.
|
|
|
|
Return:
|
|
-------
|
|
'continuationContents' which includes metadata & chat data.
|
|
'''
|
|
livechat_json = self._get_livechat_json(
|
|
continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
|
|
contents, dat = self._parser.get_contents(livechat_json)
|
|
if self._dat == '' and dat:
|
|
self._dat = dat
|
|
if self._first_fetch:
|
|
if contents is None or self._is_replay:
|
|
'''Try to fetch archive chat data.'''
|
|
self._parser.is_replay = True
|
|
self._fetch_url = config._smr
|
|
continuation = arcparam.getparam(
|
|
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
|
|
livechat_json = self._get_livechat_json(
|
|
continuation, client, replay=True, offset_ms=self.seektime * 1000)
|
|
reload_continuation = self._parser.reload_continuation(
|
|
self._parser.get_contents(livechat_json)[0])
|
|
if reload_continuation:
|
|
livechat_json = (self._get_livechat_json(
|
|
reload_continuation, client, headers))
|
|
contents, _ = self._parser.get_contents(livechat_json)
|
|
self._is_replay = True
|
|
self._first_fetch = False
|
|
return contents
|
|
|
|
def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
|
|
'''
|
|
Get json which includes chat data.
|
|
'''
|
|
livechat_json = None
|
|
err = None
|
|
if offset_ms < 0:
|
|
offset_ms = 0
|
|
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
|
|
for _ in range(MAX_RETRY + 1):
|
|
try:
|
|
response = client.post(self._fetch_url, json=param)
|
|
livechat_json = response.json()
|
|
break
|
|
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
|
|
err = e
|
|
time.sleep(2)
|
|
continue
|
|
else:
|
|
self._logger.error(f"[{self._video_id}]"
|
|
f"Exceeded retry count. Last error: {str(err)}")
|
|
self._raise_exception(exceptions.RetryExceedMaxCount())
|
|
return livechat_json
|
|
|
|
def get(self):
|
|
if self.is_alive():
|
|
chat_component = self._get_chat_component()
|
|
return self.processor.process([chat_component])
|
|
else:
|
|
return []
|
|
|
|
def is_replay(self):
|
|
return self._is_replay
|
|
|
|
def is_alive(self):
|
|
return self._is_alive
|
|
|
|
def terminate(self):
|
|
if not self.is_alive():
|
|
return
|
|
self._is_alive = False
|
|
self.processor.finalize()
|
|
|
|
def raise_for_status(self):
|
|
if self._exception_holder is not None:
|
|
raise self._exception_holder
|
|
|
|
def _raise_exception(self, exception: Exception = None):
|
|
self.terminate()
|
|
if self._hold_exception is False:
|
|
raise exception
|
|
self._exception_holder = exception
|