From 78fbe97b66221201a308a06fedd4cc14b43d38bc Mon Sep 17 00:00:00 2001 From: taizan-hokouto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sat, 9 Jan 2021 22:09:31 +0900 Subject: [PATCH] Fix process of fetching archived chat --- pytchat/config/__init__.py | 2 +- pytchat/core/pytchat.py | 7 ++--- pytchat/core_async/livechat.py | 5 ++-- pytchat/core_multithread/livechat.py | 8 +++--- pytchat/paramgen/arcparam.py | 24 ++++++---------- pytchat/parser/live.py | 2 +- pytchat/util/__init__.py | 27 ++++++++++++++++++ tests/test_arcparam.py | 19 ------------- tests/test_livechat_2.py | 42 ---------------------------- 9 files changed, 48 insertions(+), 88 deletions(-) delete mode 100644 tests/test_arcparam.py diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index 4f26a1e..e390412 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -9,6 +9,6 @@ _sml = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9%9 str: - channel_id = '_' * 24 +def _header(video_id, channel_id) -> str: S1_3 = enc.rs(1, video_id) S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id) S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5) @@ -13,31 +12,26 @@ def _header(video_id) -> str: return b64enc(header_replay) -def _build(video_id, seektime, topchat_only) -> str: +def _build(video_id, seektime, topchat_only, channel_id) -> str: chattype = 4 if topchat_only else 1 - fetch_before_start = 3 - timestamp = 1000 if seektime < 0: - fetch_before_start = 4 - elif seektime == 0: - timestamp = 1000 - else: - timestamp = int(seektime * 1000000) - header = enc.rs(3, _header(video_id)) + seektime = 0 + timestamp = int(seektime * 1000000) + header = enc.rs(3, _header(video_id, channel_id)) timestamp = enc.nm(5, timestamp) s6 = enc.nm(6, 0) s7 = enc.nm(7, 0) s8 = enc.nm(8, 0) - s9 = enc.nm(9, fetch_before_start) + s9 = enc.nm(9, 4) s10 = enc.rs(10, enc.nm(4, 0)) - chattype = enc.rs(14, enc.nm(1, chattype)) + chattype = enc.rs(14, enc.nm(1, 4)) s15 = enc.nm(15, 0) entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15)) continuation = enc.rs(156074452, entity) return quote(b64enc(continuation).decode()) -def getparam(video_id, seektime=-1, topchat_only=False) -> str: +def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str: ''' Parameter --------- @@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str: topchat_only : bool if True, fetch only 'top chat' ''' - return _build(video_id, seektime, topchat_only) + return _build(video_id, seektime, topchat_only, channel_id) diff --git a/pytchat/parser/live.py b/pytchat/parser/live.py index 567bb19..8bd10fb 100644 --- a/pytchat/parser/live.py +++ b/pytchat/parser/live.py @@ -28,7 +28,7 @@ class Parser: def get_contents(self, jsn): if jsn is None: self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.')) - if jsn.get("error") or jsn.get("responseContext", {}).get("errors"): + if jsn.get("responseContext", {}).get("errors"): raise exceptions.ResponseContextError( 'The video_id would be wrong, or video is deleted or private.') contents = jsn.get('continuationContents') diff --git a/pytchat/util/__init__.py b/pytchat/util/__init__.py index 66cdd96..08baf40 100644 --- a/pytchat/util/__init__.py +++ b/pytchat/util/__init__.py @@ -3,6 +3,7 @@ import httpx import json import os import re +from urllib.parse import quote from .. import config from .. exceptions import InvalidVideoIdException @@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)") +PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"") + YT_VIDEO_ID_LENGTH = 11 CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00")) @@ -92,3 +95,27 @@ def extract_video_id(url_or_id: str) -> str: if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") return ret + + +def get_channelid(client, video_id): + resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers) + match = re.search(PATTERN_CHANNEL, resp.text) + if match is None: + raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.") + try: + ret = match.group(1) + except IndexError: + raise InvalidVideoIdException(f"Invalid video id: {video_id}") + return ret + +async def get_channelid_async(client, video_id): + resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers) + match = re.search(PATTERN_CHANNEL, resp.text) + if match is None: + # return "" + raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.") + try: + ret = match.group(1) + except IndexError: + raise InvalidVideoIdException(f"Invalid video id: {video_id}") + return ret \ No newline at end of file diff --git a/tests/test_arcparam.py b/tests/test_arcparam.py deleted file mode 100644 index dc9be28..0000000 --- a/tests/test_arcparam.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -import httpx -import pytchat.config as config -from pytchat.paramgen import arcparam -from pytchat.parser.live import Parser - - -def test_arcparam_0(mocker): - param = arcparam.getparam("01234567890", -1) - assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA" - - -def test_arcparam_1(mocker): - param = arcparam.getparam("01234567890", seektime=100000) - assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D" - -def test_arcparam_3(mocker): - param = arcparam.getparam("01234567890") - assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA" diff --git a/tests/test_livechat_2.py b/tests/test_livechat_2.py index 657c546..eba1236 100644 --- a/tests/test_livechat_2.py +++ b/tests/test_livechat_2.py @@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock): assert True -def test_async_replay_stream(httpx_mock: HTTPXMock): - add_response_file(httpx_mock, 'tests/testdata/finished_live.json') - add_response_file(httpx_mock, 'tests/testdata/chatreplay.json') - - async def test_loop(): - chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor()) - chats = await chat.get() - rawdata = chats[0]["chatdata"] - # assert fetching replaychat data - assert list(rawdata[0]["addChatItemAction"]["item"].keys())[ - 0] == "liveChatTextMessageRenderer" - assert list(rawdata[14]["addChatItemAction"]["item"].keys())[ - 0] == "liveChatPaidMessageRenderer" - - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(test_loop()) - except CancelledError: - assert True - - -def test_async_force_replay(httpx_mock: HTTPXMock): - add_response_file(httpx_mock, 'tests/testdata/test_stream.json') - add_response_file(httpx_mock, 'tests/testdata/chatreplay.json') - - async def test_loop(): - chat = LiveChatAsync( - video_id='__test_id__', processor=DummyProcessor(), force_replay=True) - chats = await chat.get() - rawdata = chats[0]["chatdata"] - # assert fetching replaychat data - assert list(rawdata[14]["addChatItemAction"]["item"].keys())[ - 0] == "liveChatPaidMessageRenderer" - # assert not mix livechat data - assert list(rawdata[2]["addChatItemAction"]["item"].keys())[ - 0] != "liveChatPlaceholderItemRenderer" - - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(test_loop()) - except CancelledError: - assert True def test_multithread_live_stream(httpx_mock: HTTPXMock):