Merge branch 'master' into develop

This commit is contained in:
taizan-hokouto
2021-01-09 22:10:28 +09:00
9 changed files with 48 additions and 88 deletions

View File

@@ -9,6 +9,6 @@ _sml = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG
_smr = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG%G#jBOQ!iEb03+@<k(QAU-F)8U=fDGsP557S5F7CiNH7;)D3N77^*B6YU@\\?WfBr0emZX=#^").decode()
def logger(module_name: str, loglevel=None):
def logger(module_name: str, loglevel=logging.DEBUG):
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
return module_logger

View File

@@ -14,7 +14,6 @@ from .. import util
headers = config.headers
MAX_RETRY = 10
class PytchatCore:
'''
@@ -89,7 +88,7 @@ class PytchatCore:
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
self.continuation = liveparam.getparam(self._video_id, past_sec=3)
def _get_chat_component(self):
@@ -143,8 +142,8 @@ class PytchatCore:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)[0])
if reload_continuation:

View File

@@ -81,7 +81,7 @@ class LiveChatAsync:
direct_mode=False,
force_replay=False,
topchat_only=False,
logger=config.logger(__name__),
logger=config.logger(__name__)
):
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
@@ -223,8 +223,9 @@ class LiveChatAsync:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = config._smr
channelid = await util.get_channelid_async(client, self._video_id)
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, channelid)
livechat_json = (await self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(

View File

@@ -208,7 +208,7 @@ class LiveChat:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = self._get_livechat_json(continuation, client, headers)
livechat_json = self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
@@ -218,7 +218,7 @@ class LiveChat:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
@@ -242,8 +242,8 @@ class LiveChat:
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
try:
resp = client.post(self._fetch_url, json=param)
livechat_json = resp.json()
response = client.post(self._fetch_url, json=param)
livechat_json = response.json()
break
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)

View File

@@ -3,8 +3,7 @@ from base64 import urlsafe_b64encode as b64enc
from urllib.parse import quote
def _header(video_id) -> str:
channel_id = '_' * 24
def _header(video_id, channel_id) -> str:
S1_3 = enc.rs(1, video_id)
S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id)
S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5)
@@ -13,31 +12,26 @@ def _header(video_id) -> str:
return b64enc(header_replay)
def _build(video_id, seektime, topchat_only) -> str:
def _build(video_id, seektime, topchat_only, channel_id) -> str:
chattype = 4 if topchat_only else 1
fetch_before_start = 3
timestamp = 1000
if seektime < 0:
fetch_before_start = 4
elif seektime == 0:
timestamp = 1000
else:
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id))
seektime = 0
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id, channel_id))
timestamp = enc.nm(5, timestamp)
s6 = enc.nm(6, 0)
s7 = enc.nm(7, 0)
s8 = enc.nm(8, 0)
s9 = enc.nm(9, fetch_before_start)
s9 = enc.nm(9, 4)
s10 = enc.rs(10, enc.nm(4, 0))
chattype = enc.rs(14, enc.nm(1, chattype))
chattype = enc.rs(14, enc.nm(1, 4))
s15 = enc.nm(15, 0)
entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15))
continuation = enc.rs(156074452, entity)
return quote(b64enc(continuation).decode())
def getparam(video_id, seektime=-1, topchat_only=False) -> str:
def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str:
'''
Parameter
---------
@@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str:
topchat_only : bool
if True, fetch only 'top chat'
'''
return _build(video_id, seektime, topchat_only)
return _build(video_id, seektime, topchat_only, channel_id)

View File

@@ -28,7 +28,7 @@ class Parser:
def get_contents(self, jsn):
if jsn is None:
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
if jsn.get("responseContext", {}).get("errors"):
raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
contents = jsn.get('continuationContents')

View File

@@ -3,6 +3,7 @@ import httpx
import json
import os
import re
from urllib.parse import quote
from .. import config
from .. exceptions import InvalidVideoIdException
@@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")
YT_VIDEO_ID_LENGTH = 11
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
@@ -92,3 +95,27 @@ def extract_video_id(url_or_id: str) -> str:
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret
def get_channelid(client, video_id):
resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret
async def get_channelid_async(client, video_id):
resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
# return ""
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret

View File

@@ -1,19 +0,0 @@
import json
import httpx
import pytchat.config as config
from pytchat.paramgen import arcparam
from pytchat.parser.live import Parser
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", -1)
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"

View File

@@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock):
assert True
def test_async_replay_stream(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
0] == "liveChatTextMessageRenderer"
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_async_force_replay(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
# assert not mix livechat data
assert list(rawdata[2]["addChatItemAction"]["item"].keys())[
0] != "liveChatPlaceholderItemRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_multithread_live_stream(httpx_mock: HTTPXMock):