This commit is contained in:
taizan-hokuto
2020-05-31 19:45:01 +09:00
parent 6eb848f1c9
commit 141dbcd2da
4 changed files with 120 additions and 126 deletions

View File

@@ -1,7 +1,6 @@
import aiohttp, asyncio
import datetime
import aiohttp
import asyncio
import json
import random
import signal
import time
import traceback
@@ -12,8 +11,8 @@ from asyncio import Queue
from .buffer import Buffer
from ..parser.live import Parser
from .. import config
from ..exceptions import ChatParseException,IllegalFunctionCall
from ..paramgen import liveparam, arcparam
from ..exceptions import ChatParseException, IllegalFunctionCall
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
@@ -58,14 +57,14 @@ class LiveChatAsync:
Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool
force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。
topchat_only : bool
Trueの場合、上位チャットのみ取得する。
Attributes
---------
_is_alive : bool
@@ -75,19 +74,19 @@ class LiveChatAsync:
_setup_finished = False
def __init__(self, video_id,
seektime = 0,
processor = DefaultProcessor(),
buffer = None,
interruptable = True,
callback = None,
done_callback = None,
exception_handler = None,
direct_mode = False,
force_replay = False,
topchat_only = False,
logger = config.logger(__name__),
):
self.video_id = video_id
seektime=-1,
processor=DefaultProcessor(),
buffer=None,
interruptable=True,
callback=None,
done_callback=None,
exception_handler=None,
direct_mode=False,
force_replay=False,
topchat_only=False,
logger=config.logger(__name__),
):
self.video_id = video_id
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
@@ -98,9 +97,9 @@ class LiveChatAsync:
self._done_callback = done_callback
self._exception_handler = exception_handler
self._direct_mode = direct_mode
self._is_alive = True
self._is_alive = True
self._is_replay = force_replay
self._parser = Parser(is_replay = self._is_replay)
self._parser = Parser(is_replay=self._is_replay)
self._pauser = Queue()
self._pauser.put_nowait(None)
self._setup()
@@ -115,32 +114,32 @@ class LiveChatAsync:
if exception_handler:
self._set_exception_handler(exception_handler)
if interruptable:
signal.signal(signal.SIGINT,
(lambda a, b:asyncio.create_task(
LiveChatAsync.shutdown(None,signal.SIGINT,b))
))
signal.signal(signal.SIGINT,
(lambda a, b: asyncio.create_task(
LiveChatAsync.shutdown(None, signal.SIGINT, b))
))
def _setup(self):
#direct modeがTrueでcallback未設定の場合例外発生。
# direct modeがTrueでcallback未設定の場合例外発生。
if self._direct_mode:
if self._callback is None:
raise IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.")
else:
#direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
if self._buffer is None:
self._buffer = Buffer(maxsize = 20)
#callbackが指定されている場合はcallbackを呼ぶループタスクを作成
self._buffer = Buffer(maxsize=20)
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成
if self._callback is None:
pass
pass
else:
#callbackを呼ぶループタスクの開始
# callbackを呼ぶループタスクの開始
loop = asyncio.get_event_loop()
loop.create_task(self._callback_loop(self._callback))
#_listenループタスクの開始
# _listenループタスクの開始
loop = asyncio.get_event_loop()
listen_task = loop.create_task(self._startlisten())
#add_done_callbackの登録
# add_done_callbackの登録
if self._done_callback is None:
listen_task.add_done_callback(self.finish)
else:
@@ -150,7 +149,7 @@ class LiveChatAsync:
"""Fetch first continuation parameter,
create and start _listen loop.
"""
initial_continuation = liveparam.getparam(self.video_id,3)
initial_continuation = liveparam.getparam(self.video_id, 3)
await self._listen(initial_continuation)
async def _listen(self, continuation):
@@ -168,33 +167,33 @@ class LiveChatAsync:
continuation = await self._check_pause(continuation)
contents = await self._get_contents(
continuation, session, headers)
metadata, chatdata = self._parser.parse(contents)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs']/1000
chat_component = {
"video_id" : self.video_id,
"timeout" : timeout,
"chatdata" : chatdata
"video_id": self.video_id,
"timeout": timeout,
"chatdata": chatdata
}
time_mark =time.time()
time_mark = time.time()
if self._direct_mode:
processed_chat = self.processor.process([chat_component])
if isinstance(processed_chat,tuple):
if isinstance(processed_chat, tuple):
await self._callback(*processed_chat)
else:
await self._callback(processed_chat)
else:
await self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark)
await asyncio.sleep(diff_time)
continuation = metadata.get('continuation')
await asyncio.sleep(diff_time)
continuation = metadata.get('continuation')
except ChatParseException as e:
self._logger.debug(f"[{self.video_id}]{str(e)}")
return
except (TypeError , json.JSONDecodeError) :
return
except (TypeError, json.JSONDecodeError):
self._logger.error(f"{traceback.format_exc(limit = -1)}")
return
self._logger.debug(f"[{self.video_id}]finished fetching chat.")
async def _check_pause(self, continuation):
@@ -212,16 +211,14 @@ class LiveChatAsync:
async def _get_contents(self, continuation, session, headers):
'''Get 'continuationContents' from livechat json.
If contents is None at first fetching,
If contents is None at first fetching,
try to fetch archive chat data.
Return:
-------
'continuationContents' which includes metadata & chatdata.
'''
livechat_json = (await
self._get_livechat_json(continuation, session, headers)
)
livechat_json = await self._get_livechat_json(continuation, session, headers)
contents = self._parser.get_contents(livechat_json)
if self._first_fetch:
if contents is None or self._is_replay:
@@ -230,12 +227,12 @@ class LiveChatAsync:
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
continuation = arcparam.getparam(
self.video_id, self.seektime, self._topchat_only)
livechat_json = (await self._get_livechat_json(
continuation, session, headers))
livechat_json = (await self._get_livechat_json(
continuation, session, headers))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
if reload_continuation:
livechat_json = (await self._get_livechat_json(
livechat_json = (await self._get_livechat_json(
reload_continuation, session, headers))
contents = self._parser.get_contents(livechat_json)
self._is_replay = True
@@ -249,26 +246,26 @@ class LiveChatAsync:
continuation = urllib.parse.quote(continuation)
livechat_json = None
status_code = 0
url =f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1):
async with session.get(url ,headers = headers) as resp:
async with session.get(url, headers=headers) as resp:
try:
text = await resp.text()
livechat_json = json.loads(text)
break
except (ClientConnectorError,json.JSONDecodeError) :
except (ClientConnectorError, json.JSONDecodeError):
await asyncio.sleep(1)
continue
else:
self._logger.error(f"[{self.video_id}]"
f"Exceeded retry count. status_code={status_code}")
f"Exceeded retry count. status_code={status_code}")
return None
return livechat_json
async def _callback_loop(self,callback):
async def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。
callbackに指定された関数に一定間隔でチャットデータを投げる。
Parameter
---------
callback : func
@@ -285,13 +282,13 @@ class LiveChatAsync:
async def get(self):
""" bufferからデータを取り出し、processorに投げ、
加工済みのチャットデータを返す。
Returns
: Processorによって加工されたチャットデータ
"""
if self._callback is None:
items = await self._buffer.get()
return self.processor.process(items)
return self.processor.process(items)
raise IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。")
@@ -309,13 +306,13 @@ class LiveChatAsync:
return
if self._pauser.empty():
self._pauser.put_nowait(None)
def is_alive(self):
return self._is_alive
def finish(self,sender):
def finish(self, sender):
'''Listener終了時のコールバック'''
try:
try:
self.terminate()
except CancelledError:
self._logger.debug(f'[{self.video_id}]cancelled:{sender}')
@@ -325,24 +322,24 @@ class LiveChatAsync:
Listenerを終了する。
'''
self._is_alive = False
if self._direct_mode == False:
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
self._buffer.put_nowait({'chatdata':'','timeout':0})
if self._direct_mode is False:
# bufferにダミーオブジェクトを入れてis_alive()を判定させる
self._buffer.put_nowait({'chatdata': '', 'timeout': 0})
self._logger.info(f'[{self.video_id}]finished.')
@classmethod
def _set_exception_handler(cls, handler):
loop = asyncio.get_event_loop()
loop.set_exception_handler(handler)
@classmethod
async def shutdown(cls, event, sig = None, handler=None):
async def shutdown(cls, event, sig=None, handler=None):
cls._logger.debug("shutdown...")
tasks = [t for t in asyncio.all_tasks() if t is not
asyncio.current_task()]
asyncio.current_task()]
[task.cancel() for task in tasks]
cls._logger.debug(f"complete remaining tasks...")
await asyncio.gather(*tasks,return_exceptions=True)
cls._logger.debug("complete remaining tasks...")
await asyncio.gather(*tasks, return_exceptions=True)
loop = asyncio.get_event_loop()
loop.stop()
loop.stop()

View File

@@ -1,7 +1,5 @@
import requests
import datetime
import json
import random
import signal
import time
import traceback
@@ -53,9 +51,9 @@ class LiveChat:
direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる
(設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool
force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。
@@ -74,7 +72,7 @@ class LiveChat:
_setup_finished = False
def __init__(self, video_id,
seektime=0,
seektime=-1,
processor=DefaultProcessor(),
buffer=None,
interruptable=True,
@@ -181,7 +179,7 @@ class LiveChat:
self._logger.debug(f"[{self.video_id}]{str(e)}")
return
except (TypeError, json.JSONDecodeError):
self._logger.error(f"{traceback.format_exc(limit = -1)}")
self._logger.error(f"{traceback.format_exc(limit=-1)}")
return
self._logger.debug(f"[{self.video_id}]finished fetching chat.")
@@ -200,7 +198,7 @@ class LiveChat:
def _get_contents(self, continuation, session, headers):
'''Get 'continuationContents' from livechat json.
If contents is None at first fetching,
If contents is None at first fetching,
try to fetch archive chat data.
Return:
@@ -255,7 +253,7 @@ class LiveChat:
def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。
callbackに指定された関数に一定間隔でチャットデータを投げる。
Parameter
---------

View File

@@ -1,7 +1,5 @@
from base64 import urlsafe_b64encode as b64enc
from functools import reduce
import math
import random
import urllib.parse
'''
@@ -12,6 +10,7 @@ Author: taizan-hokuto (2019) @taizan205
ver 0.0.1 2019.10.05
'''
def _gen_vid_long(video_id):
"""generate video_id parameter.
Parameter
@@ -23,7 +22,7 @@ def _gen_vid_long(video_id):
byte[] : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_id = video_id.encode()
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
header_terminator = b'\x20\x01'
@@ -67,15 +66,17 @@ def _gen_vid(video_id):
def _nval(val):
"""convert value to byte array"""
if val<0: raise ValueError
if val < 0:
raise ValueError
buf = b''
while val >> 7:
m = val & 0xFF | 0x80
buf += m.to_bytes(1,'big')
buf += m.to_bytes(1, 'big')
val >>= 7
buf += val.to_bytes(1,'big')
buf += val.to_bytes(1, 'big')
return buf
def _build(video_id, seektime, topchat_only):
switch_01 = b'\x04' if topchat_only else b'\x01'
if seektime < 0:
@@ -83,11 +84,9 @@ def _build(video_id, seektime, topchat_only):
if seektime == 0:
times = b''
else:
times =_nval(int(seektime*1000))
times = _nval(int(seektime*1000))
if seektime > 0:
_len_time = ( b'\x5A'
+ (len(times)+1).to_bytes(1,'big')
+ b'\x10')
_len_time = b'\x5A' + (len(times)+1).to_bytes(1, 'big') + b'\x10'
else:
_len_time = b''
@@ -112,15 +111,16 @@ def _build(video_id, seektime, topchat_only):
]
body = reduce(lambda x, y: x+y, body)
return urllib.parse.quote(
b64enc( header_magic +
return urllib.parse.quote(
b64enc(header_magic +
_nval(len(body)) +
body
).decode()
)
def getparam(video_id, seektime = 0.0, topchat_only = False):
def getparam(video_id, seektime=0.0, topchat_only=False):
'''
Parameter
---------

View File

@@ -4,27 +4,26 @@ pytchat.parser.live
Parser of live chat JSON.
"""
import json
from .. exceptions import (
ResponseContextError,
NoContentsException,
from .. exceptions import (
ResponseContextError,
NoContentsException,
NoContinuationsException,
ChatParseException )
ChatParseException)
class Parser:
__slots__ = ['is_replay']
def __init__(self, is_replay):
def __init__(self, is_replay):
self.is_replay = is_replay
def get_contents(self, jsn):
if jsn is None:
if jsn is None:
raise ChatParseException('Called with none JSON object.')
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError('The video_id would be wrong,'
'or video is deleted or private.')
contents=jsn['response'].get('continuationContents')
raise ResponseContextError('The video_id would be wrong, or video is deleted or private.')
contents = jsn['response'].get('continuationContents')
return contents
def parse(self, contents):
@@ -40,7 +39,7 @@ class Parser:
+ metadata : dict
+ timeout
+ video_id
+ continuation
+ continuation
+ chatdata : List[dict]
"""
@@ -51,9 +50,9 @@ class Parser:
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise NoContinuationsException('No Continuation')
metadata = (cont.get('invalidationContinuationData') or
cont.get('timedContinuationData') or
cont.get('reloadContinuationData') or
metadata = (cont.get('invalidationContinuationData') or
cont.get('timedContinuationData') or
cont.get('reloadContinuationData') or
cont.get('liveChatReplayContinuationData')
)
if metadata is None:
@@ -68,30 +67,30 @@ class Parser:
def reload_continuation(self, contents):
"""
When `seektime = 0` or seektime is abbreviated ,
check if fetched chat json has no chat data.
If so, try to fetch playerSeekContinuationData.
When `seektime = 0` or seektime is abbreviated ,
check if fetched chat json has no chat data.
If so, try to fetch playerSeekContinuationData.
This function must be run only first fetching.
"""
cont = contents['liveChatContinuation']['continuations'][0]
if cont.get("liveChatReplayContinuationData"):
#chat data exist.
# chat data exist.
return None
#chat data do not exist, get playerSeekContinuationData.
# chat data do not exist, get playerSeekContinuationData.
init_cont = cont.get("playerSeekContinuationData")
if init_cont:
return init_cont.get("continuation")
raise ChatParseException('Finished chat data')
def _create_data(self, metadata, contents):
def _create_data(self, metadata, contents):
actions = contents['liveChatContinuation'].get('actions')
if self.is_replay:
if self.is_replay:
interval = self._get_interval(actions)
metadata.setdefault("timeoutMs",interval)
"""Archived chat has different structures than live chat,
metadata.setdefault("timeoutMs", interval)
"""Archived chat has different structures than live chat,
so make it the same format."""
chatdata = [action["replayChatItemAction"]["actions"][0]
for action in actions]
for action in actions]
else:
metadata.setdefault('timeoutMs', 10000)
chatdata = actions
@@ -102,4 +101,4 @@ class Parser:
return 0
start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
return (last - start)
return (last - start)