This commit is contained in:
taizan-hokuto
2020-05-31 19:45:01 +09:00
parent 6eb848f1c9
commit 141dbcd2da
4 changed files with 120 additions and 126 deletions

View File

@@ -1,7 +1,6 @@
import aiohttp, asyncio import aiohttp
import datetime import asyncio
import json import json
import random
import signal import signal
import time import time
import traceback import traceback
@@ -12,8 +11,8 @@ from asyncio import Queue
from .buffer import Buffer from .buffer import Buffer
from ..parser.live import Parser from ..parser.live import Parser
from .. import config from .. import config
from ..exceptions import ChatParseException,IllegalFunctionCall from ..exceptions import ChatParseException, IllegalFunctionCall
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
@@ -58,14 +57,14 @@ class LiveChatAsync:
Trueの場合、bufferを使わずにcallbackを呼ぶ。 Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須 Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる (設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。 強制的にアーカイブ済みチャットを取得する。
topchat_only : bool topchat_only : bool
Trueの場合、上位チャットのみ取得する。 Trueの場合、上位チャットのみ取得する。
Attributes Attributes
--------- ---------
_is_alive : bool _is_alive : bool
@@ -75,19 +74,19 @@ class LiveChatAsync:
_setup_finished = False _setup_finished = False
def __init__(self, video_id, def __init__(self, video_id,
seektime = 0, seektime=-1,
processor = DefaultProcessor(), processor=DefaultProcessor(),
buffer = None, buffer=None,
interruptable = True, interruptable=True,
callback = None, callback=None,
done_callback = None, done_callback=None,
exception_handler = None, exception_handler=None,
direct_mode = False, direct_mode=False,
force_replay = False, force_replay=False,
topchat_only = False, topchat_only=False,
logger = config.logger(__name__), logger=config.logger(__name__),
): ):
self.video_id = video_id self.video_id = video_id
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)
@@ -98,9 +97,9 @@ class LiveChatAsync:
self._done_callback = done_callback self._done_callback = done_callback
self._exception_handler = exception_handler self._exception_handler = exception_handler
self._direct_mode = direct_mode self._direct_mode = direct_mode
self._is_alive = True self._is_alive = True
self._is_replay = force_replay self._is_replay = force_replay
self._parser = Parser(is_replay = self._is_replay) self._parser = Parser(is_replay=self._is_replay)
self._pauser = Queue() self._pauser = Queue()
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
self._setup() self._setup()
@@ -115,32 +114,32 @@ class LiveChatAsync:
if exception_handler: if exception_handler:
self._set_exception_handler(exception_handler) self._set_exception_handler(exception_handler)
if interruptable: if interruptable:
signal.signal(signal.SIGINT, signal.signal(signal.SIGINT,
(lambda a, b:asyncio.create_task( (lambda a, b: asyncio.create_task(
LiveChatAsync.shutdown(None,signal.SIGINT,b)) LiveChatAsync.shutdown(None, signal.SIGINT, b))
)) ))
def _setup(self): def _setup(self):
#direct modeがTrueでcallback未設定の場合例外発生。 # direct modeがTrueでcallback未設定の場合例外発生。
if self._direct_mode: if self._direct_mode:
if self._callback is None: if self._callback is None:
raise IllegalFunctionCall( raise IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.") "When direct_mode=True, callback parameter is required.")
else: else:
#direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 # direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
if self._buffer is None: if self._buffer is None:
self._buffer = Buffer(maxsize = 20) self._buffer = Buffer(maxsize=20)
#callbackが指定されている場合はcallbackを呼ぶループタスクを作成 # callbackが指定されている場合はcallbackを呼ぶループタスクを作成
if self._callback is None: if self._callback is None:
pass pass
else: else:
#callbackを呼ぶループタスクの開始 # callbackを呼ぶループタスクの開始
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.create_task(self._callback_loop(self._callback)) loop.create_task(self._callback_loop(self._callback))
#_listenループタスクの開始 # _listenループタスクの開始
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
listen_task = loop.create_task(self._startlisten()) listen_task = loop.create_task(self._startlisten())
#add_done_callbackの登録 # add_done_callbackの登録
if self._done_callback is None: if self._done_callback is None:
listen_task.add_done_callback(self.finish) listen_task.add_done_callback(self.finish)
else: else:
@@ -150,7 +149,7 @@ class LiveChatAsync:
"""Fetch first continuation parameter, """Fetch first continuation parameter,
create and start _listen loop. create and start _listen loop.
""" """
initial_continuation = liveparam.getparam(self.video_id,3) initial_continuation = liveparam.getparam(self.video_id, 3)
await self._listen(initial_continuation) await self._listen(initial_continuation)
async def _listen(self, continuation): async def _listen(self, continuation):
@@ -168,33 +167,33 @@ class LiveChatAsync:
continuation = await self._check_pause(continuation) continuation = await self._check_pause(continuation)
contents = await self._get_contents( contents = await self._get_contents(
continuation, session, headers) continuation, session, headers)
metadata, chatdata = self._parser.parse(contents) metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs']/1000 timeout = metadata['timeoutMs']/1000
chat_component = { chat_component = {
"video_id" : self.video_id, "video_id": self.video_id,
"timeout" : timeout, "timeout": timeout,
"chatdata" : chatdata "chatdata": chatdata
} }
time_mark =time.time() time_mark = time.time()
if self._direct_mode: if self._direct_mode:
processed_chat = self.processor.process([chat_component]) processed_chat = self.processor.process([chat_component])
if isinstance(processed_chat,tuple): if isinstance(processed_chat, tuple):
await self._callback(*processed_chat) await self._callback(*processed_chat)
else: else:
await self._callback(processed_chat) await self._callback(processed_chat)
else: else:
await self._buffer.put(chat_component) await self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark) diff_time = timeout - (time.time()-time_mark)
await asyncio.sleep(diff_time) await asyncio.sleep(diff_time)
continuation = metadata.get('continuation') continuation = metadata.get('continuation')
except ChatParseException as e: except ChatParseException as e:
self._logger.debug(f"[{self.video_id}]{str(e)}") self._logger.debug(f"[{self.video_id}]{str(e)}")
return return
except (TypeError , json.JSONDecodeError) : except (TypeError, json.JSONDecodeError):
self._logger.error(f"{traceback.format_exc(limit = -1)}") self._logger.error(f"{traceback.format_exc(limit = -1)}")
return return
self._logger.debug(f"[{self.video_id}]finished fetching chat.") self._logger.debug(f"[{self.video_id}]finished fetching chat.")
async def _check_pause(self, continuation): async def _check_pause(self, continuation):
@@ -212,16 +211,14 @@ class LiveChatAsync:
async def _get_contents(self, continuation, session, headers): async def _get_contents(self, continuation, session, headers):
'''Get 'continuationContents' from livechat json. '''Get 'continuationContents' from livechat json.
If contents is None at first fetching, If contents is None at first fetching,
try to fetch archive chat data. try to fetch archive chat data.
Return: Return:
------- -------
'continuationContents' which includes metadata & chatdata. 'continuationContents' which includes metadata & chatdata.
''' '''
livechat_json = (await livechat_json = await self._get_livechat_json(continuation, session, headers)
self._get_livechat_json(continuation, session, headers)
)
contents = self._parser.get_contents(livechat_json) contents = self._parser.get_contents(livechat_json)
if self._first_fetch: if self._first_fetch:
if contents is None or self._is_replay: if contents is None or self._is_replay:
@@ -230,12 +227,12 @@ class LiveChatAsync:
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
continuation = arcparam.getparam( continuation = arcparam.getparam(
self.video_id, self.seektime, self._topchat_only) self.video_id, self.seektime, self._topchat_only)
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
continuation, session, headers)) continuation, session, headers))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)) self._parser.get_contents(livechat_json))
if reload_continuation: if reload_continuation:
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
reload_continuation, session, headers)) reload_continuation, session, headers))
contents = self._parser.get_contents(livechat_json) contents = self._parser.get_contents(livechat_json)
self._is_replay = True self._is_replay = True
@@ -249,26 +246,26 @@ class LiveChatAsync:
continuation = urllib.parse.quote(continuation) continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
status_code = 0 status_code = 0
url =f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
async with session.get(url ,headers = headers) as resp: async with session.get(url, headers=headers) as resp:
try: try:
text = await resp.text() text = await resp.text()
livechat_json = json.loads(text) livechat_json = json.loads(text)
break break
except (ClientConnectorError,json.JSONDecodeError) : except (ClientConnectorError, json.JSONDecodeError):
await asyncio.sleep(1) await asyncio.sleep(1)
continue continue
else: else:
self._logger.error(f"[{self.video_id}]" self._logger.error(f"[{self.video_id}]"
f"Exceeded retry count. status_code={status_code}") f"Exceeded retry count. status_code={status_code}")
return None return None
return livechat_json return livechat_json
async def _callback_loop(self,callback): async def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで """ コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。 callbackに指定された関数に一定間隔でチャットデータを投げる。
Parameter Parameter
--------- ---------
callback : func callback : func
@@ -285,13 +282,13 @@ class LiveChatAsync:
async def get(self): async def get(self):
""" bufferからデータを取り出し、processorに投げ、 """ bufferからデータを取り出し、processorに投げ、
加工済みのチャットデータを返す。 加工済みのチャットデータを返す。
Returns Returns
: Processorによって加工されたチャットデータ : Processorによって加工されたチャットデータ
""" """
if self._callback is None: if self._callback is None:
items = await self._buffer.get() items = await self._buffer.get()
return self.processor.process(items) return self.processor.process(items)
raise IllegalFunctionCall( raise IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。") "既にcallbackを登録済みのため、get()は実行できません。")
@@ -309,13 +306,13 @@ class LiveChatAsync:
return return
if self._pauser.empty(): if self._pauser.empty():
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
def is_alive(self): def is_alive(self):
return self._is_alive return self._is_alive
def finish(self,sender): def finish(self, sender):
'''Listener終了時のコールバック''' '''Listener終了時のコールバック'''
try: try:
self.terminate() self.terminate()
except CancelledError: except CancelledError:
self._logger.debug(f'[{self.video_id}]cancelled:{sender}') self._logger.debug(f'[{self.video_id}]cancelled:{sender}')
@@ -325,24 +322,24 @@ class LiveChatAsync:
Listenerを終了する。 Listenerを終了する。
''' '''
self._is_alive = False self._is_alive = False
if self._direct_mode == False: if self._direct_mode is False:
#bufferにダミーオブジェクトを入れてis_alive()を判定させる # bufferにダミーオブジェクトを入れてis_alive()を判定させる
self._buffer.put_nowait({'chatdata':'','timeout':0}) self._buffer.put_nowait({'chatdata': '', 'timeout': 0})
self._logger.info(f'[{self.video_id}]finished.') self._logger.info(f'[{self.video_id}]finished.')
@classmethod @classmethod
def _set_exception_handler(cls, handler): def _set_exception_handler(cls, handler):
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.set_exception_handler(handler) loop.set_exception_handler(handler)
@classmethod @classmethod
async def shutdown(cls, event, sig = None, handler=None): async def shutdown(cls, event, sig=None, handler=None):
cls._logger.debug("shutdown...") cls._logger.debug("shutdown...")
tasks = [t for t in asyncio.all_tasks() if t is not tasks = [t for t in asyncio.all_tasks() if t is not
asyncio.current_task()] asyncio.current_task()]
[task.cancel() for task in tasks] [task.cancel() for task in tasks]
cls._logger.debug(f"complete remaining tasks...") cls._logger.debug("complete remaining tasks...")
await asyncio.gather(*tasks,return_exceptions=True) await asyncio.gather(*tasks, return_exceptions=True)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.stop() loop.stop()

View File

@@ -1,7 +1,5 @@
import requests import requests
import datetime
import json import json
import random
import signal import signal
import time import time
import traceback import traceback
@@ -53,9 +51,9 @@ class LiveChat:
direct_mode : bool direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。 Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須 Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる (設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。 強制的にアーカイブ済みチャットを取得する。
@@ -74,7 +72,7 @@ class LiveChat:
_setup_finished = False _setup_finished = False
def __init__(self, video_id, def __init__(self, video_id,
seektime=0, seektime=-1,
processor=DefaultProcessor(), processor=DefaultProcessor(),
buffer=None, buffer=None,
interruptable=True, interruptable=True,
@@ -181,7 +179,7 @@ class LiveChat:
self._logger.debug(f"[{self.video_id}]{str(e)}") self._logger.debug(f"[{self.video_id}]{str(e)}")
return return
except (TypeError, json.JSONDecodeError): except (TypeError, json.JSONDecodeError):
self._logger.error(f"{traceback.format_exc(limit = -1)}") self._logger.error(f"{traceback.format_exc(limit=-1)}")
return return
self._logger.debug(f"[{self.video_id}]finished fetching chat.") self._logger.debug(f"[{self.video_id}]finished fetching chat.")
@@ -200,7 +198,7 @@ class LiveChat:
def _get_contents(self, continuation, session, headers): def _get_contents(self, continuation, session, headers):
'''Get 'continuationContents' from livechat json. '''Get 'continuationContents' from livechat json.
If contents is None at first fetching, If contents is None at first fetching,
try to fetch archive chat data. try to fetch archive chat data.
Return: Return:
@@ -255,7 +253,7 @@ class LiveChat:
def _callback_loop(self, callback): def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで """ コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。 callbackに指定された関数に一定間隔でチャットデータを投げる。
Parameter Parameter
--------- ---------

View File

@@ -1,7 +1,5 @@
from base64 import urlsafe_b64encode as b64enc from base64 import urlsafe_b64encode as b64enc
from functools import reduce from functools import reduce
import math
import random
import urllib.parse import urllib.parse
''' '''
@@ -12,6 +10,7 @@ Author: taizan-hokuto (2019) @taizan205
ver 0.0.1 2019.10.05 ver 0.0.1 2019.10.05
''' '''
def _gen_vid_long(video_id): def _gen_vid_long(video_id):
"""generate video_id parameter. """generate video_id parameter.
Parameter Parameter
@@ -23,7 +22,7 @@ def _gen_vid_long(video_id):
byte[] : base64 encoded video_id parameter. byte[] : base64 encoded video_id parameter.
""" """
header_magic = b'\x0A\x0F\x1A\x0D\x0A' header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode() header_id = video_id.encode()
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B' header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
header_terminator = b'\x20\x01' header_terminator = b'\x20\x01'
@@ -67,15 +66,17 @@ def _gen_vid(video_id):
def _nval(val): def _nval(val):
"""convert value to byte array""" """convert value to byte array"""
if val<0: raise ValueError if val < 0:
raise ValueError
buf = b'' buf = b''
while val >> 7: while val >> 7:
m = val & 0xFF | 0x80 m = val & 0xFF | 0x80
buf += m.to_bytes(1,'big') buf += m.to_bytes(1, 'big')
val >>= 7 val >>= 7
buf += val.to_bytes(1,'big') buf += val.to_bytes(1, 'big')
return buf return buf
def _build(video_id, seektime, topchat_only): def _build(video_id, seektime, topchat_only):
switch_01 = b'\x04' if topchat_only else b'\x01' switch_01 = b'\x04' if topchat_only else b'\x01'
if seektime < 0: if seektime < 0:
@@ -83,11 +84,9 @@ def _build(video_id, seektime, topchat_only):
if seektime == 0: if seektime == 0:
times = b'' times = b''
else: else:
times =_nval(int(seektime*1000)) times = _nval(int(seektime*1000))
if seektime > 0: if seektime > 0:
_len_time = ( b'\x5A' _len_time = b'\x5A' + (len(times)+1).to_bytes(1, 'big') + b'\x10'
+ (len(times)+1).to_bytes(1,'big')
+ b'\x10')
else: else:
_len_time = b'' _len_time = b''
@@ -112,15 +111,16 @@ def _build(video_id, seektime, topchat_only):
] ]
body = reduce(lambda x, y: x+y, body) body = reduce(lambda x, y: x+y, body)
return urllib.parse.quote( return urllib.parse.quote(
b64enc( header_magic + b64enc(header_magic +
_nval(len(body)) + _nval(len(body)) +
body body
).decode() ).decode()
) )
def getparam(video_id, seektime = 0.0, topchat_only = False):
def getparam(video_id, seektime=0.0, topchat_only=False):
''' '''
Parameter Parameter
--------- ---------

View File

@@ -4,27 +4,26 @@ pytchat.parser.live
Parser of live chat JSON. Parser of live chat JSON.
""" """
import json from .. exceptions import (
from .. exceptions import ( ResponseContextError,
ResponseContextError, NoContentsException,
NoContentsException,
NoContinuationsException, NoContinuationsException,
ChatParseException ) ChatParseException)
class Parser: class Parser:
__slots__ = ['is_replay'] __slots__ = ['is_replay']
def __init__(self, is_replay): def __init__(self, is_replay):
self.is_replay = is_replay self.is_replay = is_replay
def get_contents(self, jsn): def get_contents(self, jsn):
if jsn is None: if jsn is None:
raise ChatParseException('Called with none JSON object.') raise ChatParseException('Called with none JSON object.')
if jsn['response']['responseContext'].get('errors'): if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError('The video_id would be wrong,' raise ResponseContextError('The video_id would be wrong, or video is deleted or private.')
'or video is deleted or private.') contents = jsn['response'].get('continuationContents')
contents=jsn['response'].get('continuationContents')
return contents return contents
def parse(self, contents): def parse(self, contents):
@@ -40,7 +39,7 @@ class Parser:
+ metadata : dict + metadata : dict
+ timeout + timeout
+ video_id + video_id
+ continuation + continuation
+ chatdata : List[dict] + chatdata : List[dict]
""" """
@@ -51,9 +50,9 @@ class Parser:
cont = contents['liveChatContinuation']['continuations'][0] cont = contents['liveChatContinuation']['continuations'][0]
if cont is None: if cont is None:
raise NoContinuationsException('No Continuation') raise NoContinuationsException('No Continuation')
metadata = (cont.get('invalidationContinuationData') or metadata = (cont.get('invalidationContinuationData') or
cont.get('timedContinuationData') or cont.get('timedContinuationData') or
cont.get('reloadContinuationData') or cont.get('reloadContinuationData') or
cont.get('liveChatReplayContinuationData') cont.get('liveChatReplayContinuationData')
) )
if metadata is None: if metadata is None:
@@ -68,30 +67,30 @@ class Parser:
def reload_continuation(self, contents): def reload_continuation(self, contents):
""" """
When `seektime = 0` or seektime is abbreviated , When `seektime = 0` or seektime is abbreviated ,
check if fetched chat json has no chat data. check if fetched chat json has no chat data.
If so, try to fetch playerSeekContinuationData. If so, try to fetch playerSeekContinuationData.
This function must be run only first fetching. This function must be run only first fetching.
""" """
cont = contents['liveChatContinuation']['continuations'][0] cont = contents['liveChatContinuation']['continuations'][0]
if cont.get("liveChatReplayContinuationData"): if cont.get("liveChatReplayContinuationData"):
#chat data exist. # chat data exist.
return None return None
#chat data do not exist, get playerSeekContinuationData. # chat data do not exist, get playerSeekContinuationData.
init_cont = cont.get("playerSeekContinuationData") init_cont = cont.get("playerSeekContinuationData")
if init_cont: if init_cont:
return init_cont.get("continuation") return init_cont.get("continuation")
raise ChatParseException('Finished chat data') raise ChatParseException('Finished chat data')
def _create_data(self, metadata, contents): def _create_data(self, metadata, contents):
actions = contents['liveChatContinuation'].get('actions') actions = contents['liveChatContinuation'].get('actions')
if self.is_replay: if self.is_replay:
interval = self._get_interval(actions) interval = self._get_interval(actions)
metadata.setdefault("timeoutMs",interval) metadata.setdefault("timeoutMs", interval)
"""Archived chat has different structures than live chat, """Archived chat has different structures than live chat,
so make it the same format.""" so make it the same format."""
chatdata = [action["replayChatItemAction"]["actions"][0] chatdata = [action["replayChatItemAction"]["actions"][0]
for action in actions] for action in actions]
else: else:
metadata.setdefault('timeoutMs', 10000) metadata.setdefault('timeoutMs', 10000)
chatdata = actions chatdata = actions
@@ -102,4 +101,4 @@ class Parser:
return 0 return 0
start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"]) start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"]) last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
return (last - start) return (last - start)