355 lines
12 KiB
Python
355 lines
12 KiB
Python
|
|
import asyncio
|
|
import httpx
|
|
import json
|
|
import signal
|
|
import time
|
|
import traceback
|
|
from asyncio import Queue
|
|
from concurrent.futures import CancelledError
|
|
from .buffer import Buffer
|
|
from ..parser.live import Parser
|
|
from .. import config
|
|
from .. import exceptions
|
|
from .. import util
|
|
from ..paramgen import liveparam, arcparam
|
|
from ..processors.default.processor import DefaultProcessor
|
|
from ..processors.combinator import Combinator
|
|
|
|
headers = config.headers
|
|
MAX_RETRY = 10
|
|
|
|
|
|
class LiveChatAsync:
|
|
'''LiveChatAsync object fetches chat data and stores them
|
|
in a buffer with asyncio.
|
|
|
|
Parameter
|
|
---------
|
|
video_id : str
|
|
|
|
seektime : int
|
|
start position of fetching chat (seconds).
|
|
This option is valid for archived chat only.
|
|
If negative value, chat data posted before the start of the broadcast
|
|
will be retrieved as well.
|
|
|
|
processor : ChatProcessor
|
|
|
|
buffer : Buffer
|
|
buffer of chat data fetched background.
|
|
|
|
interruptable : bool
|
|
Allows keyboard interrupts.
|
|
Set this parameter to False if your own threading program causes
|
|
the problem.
|
|
|
|
callback : func
|
|
function called periodically from _listen().
|
|
|
|
done_callback : func
|
|
function called when listener ends.
|
|
|
|
exception_handler : func
|
|
|
|
direct_mode : bool
|
|
If True, invoke specified callback function without using buffer.
|
|
callback is required. If not, IllegalFunctionCall will be raised.
|
|
|
|
force_replay : bool
|
|
force to fetch archived chat data, even if specified video is live.
|
|
|
|
topchat_only : bool
|
|
If True, get only top chat.
|
|
|
|
Attributes
|
|
---------
|
|
_is_alive : bool
|
|
Flag to stop getting chat.
|
|
'''
|
|
|
|
_setup_finished = False
|
|
|
|
def __init__(self, video_id,
|
|
seektime=-1,
|
|
processor=DefaultProcessor(),
|
|
buffer=None,
|
|
interruptable=True,
|
|
callback=None,
|
|
done_callback=None,
|
|
exception_handler=None,
|
|
direct_mode=False,
|
|
force_replay=False,
|
|
topchat_only=False,
|
|
logger=config.logger(__name__)
|
|
):
|
|
self._video_id = util.extract_video_id(video_id)
|
|
self.seektime = seektime
|
|
if isinstance(processor, tuple):
|
|
self.processor = Combinator(processor)
|
|
else:
|
|
self.processor = processor
|
|
self._buffer = buffer
|
|
self._callback = callback
|
|
self._done_callback = done_callback
|
|
self._exception_handler = exception_handler
|
|
self._direct_mode = direct_mode
|
|
self._is_alive = True
|
|
self._is_replay = force_replay
|
|
self._parser = Parser(is_replay=self._is_replay)
|
|
self._pauser = Queue()
|
|
self._pauser.put_nowait(None)
|
|
self._first_fetch = True
|
|
self._fetch_url = config._sml
|
|
self._topchat_only = topchat_only
|
|
self._dat = ''
|
|
self._last_offset_ms = 0
|
|
self._logger = logger
|
|
self.exception = None
|
|
LiveChatAsync._logger = logger
|
|
|
|
if exception_handler:
|
|
self._set_exception_handler(exception_handler)
|
|
if interruptable:
|
|
signal.signal(signal.SIGINT,
|
|
(lambda a, b: self._keyboard_interrupt()))
|
|
self._setup()
|
|
|
|
def _setup(self):
|
|
# An exception is raised when direct mode is true and no callback is set.
|
|
if self._direct_mode:
|
|
if self._callback is None:
|
|
raise exceptions.IllegalFunctionCall(
|
|
"When direct_mode=True, callback parameter is required.")
|
|
else:
|
|
# Create a default buffer if `direct_mode` is False and buffer is not set.
|
|
if self._buffer is None:
|
|
self._buffer = Buffer(maxsize=20)
|
|
# Create a loop task to call callback if the `callback` param is specified.
|
|
if self._callback is None:
|
|
pass
|
|
else:
|
|
# Create a loop task to call callback if the `callback` param is specified.
|
|
loop = asyncio.get_event_loop()
|
|
loop.create_task(self._callback_loop(self._callback))
|
|
# Start a loop task for _listen()
|
|
loop = asyncio.get_event_loop()
|
|
self.listen_task = loop.create_task(self._startlisten())
|
|
# Register add_done_callback
|
|
if self._done_callback is None:
|
|
self.listen_task.add_done_callback(self._finish)
|
|
else:
|
|
self.listen_task.add_done_callback(self._done_callback)
|
|
|
|
async def _startlisten(self):
|
|
"""Fetch first continuation parameter,
|
|
create and start _listen loop.
|
|
"""
|
|
initial_continuation = liveparam.getparam(self._video_id, 3)
|
|
await self._listen(initial_continuation)
|
|
|
|
async def _listen(self, continuation):
|
|
''' Fetch chat data and store them into buffer,
|
|
get next continuaiton parameter and loop.
|
|
|
|
Parameter
|
|
---------
|
|
continuation : str
|
|
parameter for next chat data
|
|
'''
|
|
try:
|
|
async with httpx.AsyncClient(http2=True) as client:
|
|
while(continuation and self._is_alive):
|
|
continuation = await self._check_pause(continuation)
|
|
contents = await self._get_contents(continuation, client, headers)
|
|
metadata, chatdata = self._parser.parse(contents)
|
|
timeout = metadata['timeoutMs'] / 1000
|
|
chat_component = {
|
|
"video_id": self._video_id,
|
|
"timeout": timeout,
|
|
"chatdata": chatdata
|
|
}
|
|
time_mark = time.time()
|
|
if self._direct_mode:
|
|
processed_chat = self.processor.process(
|
|
[chat_component])
|
|
if isinstance(processed_chat, tuple):
|
|
await self._callback(*processed_chat)
|
|
else:
|
|
await self._callback(processed_chat)
|
|
else:
|
|
await self._buffer.put(chat_component)
|
|
diff_time = timeout - (time.time() - time_mark)
|
|
await asyncio.sleep(diff_time)
|
|
continuation = metadata.get('continuation')
|
|
self._last_offset_ms = metadata.get('last_offset_ms', 0)
|
|
except exceptions.ChatParseException as e:
|
|
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
|
raise
|
|
except Exception:
|
|
self._logger.error(f"{traceback.format_exc(limit=-1)}")
|
|
raise
|
|
|
|
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
|
|
|
|
async def _check_pause(self, continuation):
|
|
if self._pauser.empty():
|
|
'''pause'''
|
|
await self._pauser.get()
|
|
'''resume:
|
|
prohibit from blocking by putting None into _pauser.
|
|
'''
|
|
self._pauser.put_nowait(None)
|
|
if not self._is_replay:
|
|
continuation = liveparam.getparam(
|
|
self._video_id, 3, self._topchat_only)
|
|
return continuation
|
|
|
|
async def _get_contents(self, continuation, client, headers):
|
|
'''Get 'continuationContents' from livechat json.
|
|
If contents is None at first fetching,
|
|
try to fetch archive chat data.
|
|
|
|
Return:
|
|
-------
|
|
'continuationContents' which includes metadata & chatdata.
|
|
'''
|
|
livechat_json = await self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
|
|
contents, dat = self._parser.get_contents(livechat_json)
|
|
if self._dat == '' and dat:
|
|
self._dat = dat
|
|
if self._first_fetch:
|
|
if contents is None or self._is_replay:
|
|
'''Try to fetch archive chat data.'''
|
|
self._parser.is_replay = True
|
|
self._fetch_url = config._smr
|
|
channelid = await util.get_channelid_async(client, self._video_id)
|
|
continuation = arcparam.getparam(
|
|
self._video_id, self.seektime, self._topchat_only, channelid)
|
|
livechat_json = (await self._get_livechat_json(
|
|
continuation, client, replay=True, offset_ms=self.seektime * 1000))
|
|
reload_continuation = self._parser.reload_continuation(
|
|
self._parser.get_contents(livechat_json)[0])
|
|
if reload_continuation:
|
|
livechat_json = (await self._get_livechat_json(
|
|
reload_continuation, client, headers))
|
|
contents, _ = self._parser.get_contents(livechat_json)
|
|
self._is_replay = True
|
|
self._first_fetch = False
|
|
return contents
|
|
|
|
async def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
|
|
'''
|
|
Get json which includes chat data.
|
|
'''
|
|
# continuation = urllib.parse.quote(continuation)
|
|
livechat_json = None
|
|
if offset_ms < 0:
|
|
offset_ms = 0
|
|
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
|
|
for _ in range(MAX_RETRY + 1):
|
|
try:
|
|
resp = await client.post(self._fetch_url, json=param)
|
|
livechat_json = resp.json()
|
|
break
|
|
except (json.JSONDecodeError, httpx.HTTPError):
|
|
await asyncio.sleep(2)
|
|
continue
|
|
else:
|
|
self._logger.error(f"[{self._video_id}]"
|
|
f"Exceeded retry count.")
|
|
raise exceptions.RetryExceedMaxCount()
|
|
return livechat_json
|
|
|
|
async def _callback_loop(self, callback):
|
|
""" If a callback is specified in the constructor,
|
|
it throws chat data at regular intervals to the
|
|
function specified in the callback in the backgroun
|
|
|
|
Parameter
|
|
---------
|
|
callback : func
|
|
function to which the processed chat data is passed.
|
|
"""
|
|
while self.is_alive():
|
|
items = await self._buffer.get()
|
|
processed_chat = self.processor.process(items)
|
|
if isinstance(processed_chat, tuple):
|
|
await self._callback(*processed_chat)
|
|
else:
|
|
await self._callback(processed_chat)
|
|
|
|
async def get(self):
|
|
"""
|
|
Retrieves data from the buffer,
|
|
throws it to the processor,
|
|
and returns the processed chat data.
|
|
|
|
Returns
|
|
: Chat data processed by the Processor
|
|
"""
|
|
if self._callback is None:
|
|
if self.is_alive():
|
|
items = await self._buffer.get()
|
|
return self.processor.process(items)
|
|
else:
|
|
return []
|
|
raise exceptions.IllegalFunctionCall(
|
|
"Callback parameter is already set, so get() cannot be performed.")
|
|
|
|
def is_replay(self):
|
|
return self._is_replay
|
|
|
|
def pause(self):
|
|
if self._callback is None:
|
|
return
|
|
if not self._pauser.empty():
|
|
self._pauser.get_nowait()
|
|
|
|
def resume(self):
|
|
if self._callback is None:
|
|
return
|
|
if self._pauser.empty():
|
|
self._pauser.put_nowait(None)
|
|
|
|
def is_alive(self):
|
|
return self._is_alive
|
|
|
|
def _finish(self, sender):
|
|
'''Called when the _listen() task finished.'''
|
|
try:
|
|
self._task_finished()
|
|
except CancelledError:
|
|
self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
|
|
|
|
def terminate(self):
|
|
if self._pauser.empty():
|
|
self._pauser.put_nowait(None)
|
|
self._is_alive = False
|
|
self._buffer.put_nowait({})
|
|
self.processor.finalize()
|
|
|
|
def _keyboard_interrupt(self):
|
|
self.exception = exceptions.ChatDataFinished()
|
|
self.terminate()
|
|
|
|
def _task_finished(self):
|
|
if self.is_alive():
|
|
self.terminate()
|
|
try:
|
|
self.listen_task.result()
|
|
except Exception as e:
|
|
self.exception = e
|
|
if not isinstance(e, exceptions.ChatParseException):
|
|
self._logger.error(f'Internal exception - {type(e)}{str(e)}')
|
|
self._logger.info(f'[{self._video_id}] finished.')
|
|
|
|
def raise_for_status(self):
|
|
if self.exception is not None:
|
|
raise self.exception
|
|
|
|
@classmethod
|
|
def _set_exception_handler(cls, handler):
|
|
loop = asyncio.get_event_loop()
|
|
loop.set_exception_handler(handler)
|