Compare commits

..

9 Commits

Author SHA1 Message Date
taizan-hokouto
808e599be6 Merge branch 'release/v0.5.1' 2021-01-09 22:14:55 +09:00
taizan-hokouto
5cb6f7f123 Increment version 2021-01-09 22:14:30 +09:00
taizan-hokouto
05de644d77 Merge branch 'hotfix/fix' 2021-01-09 22:13:15 +09:00
taizan-hokouto
a2f1c658f0 Merge branch 'master' into develop 2021-01-09 22:13:15 +09:00
taizan-hokouto
b908855566 Delete unnecessary line 2021-01-09 22:12:33 +09:00
taizan-hokouto
bf68859f38 Merge branch 'hotfix/fix' 2021-01-09 22:10:28 +09:00
taizan-hokouto
8d93bfcb95 Merge branch 'master' into develop 2021-01-09 22:10:28 +09:00
taizan-hokouto
78fbe97b66 Fix process of fetching archived chat 2021-01-09 22:09:31 +09:00
taizan-hokouto
166a256c1c Merge tag 'v0.5.0' into develop
v0.5.0
2020-12-13 22:29:25 +09:00
9 changed files with 47 additions and 88 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto' __copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.5.0' __version__ = '0.5.1'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -14,7 +14,6 @@ from .. import util
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
class PytchatCore: class PytchatCore:
''' '''
@@ -89,7 +88,7 @@ class PytchatCore:
"""Fetch first continuation parameter, """Fetch first continuation parameter,
create and start _listen loop. create and start _listen loop.
""" """
self.continuation = liveparam.getparam(self._video_id, 3) self.continuation = liveparam.getparam(self._video_id, past_sec=3)
def _get_chat_component(self): def _get_chat_component(self):
@@ -143,8 +142,8 @@ class PytchatCore:
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)) livechat_json = self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)[0]) self._parser.get_contents(livechat_json)[0])
if reload_continuation: if reload_continuation:

View File

@@ -81,7 +81,7 @@ class LiveChatAsync:
direct_mode=False, direct_mode=False,
force_replay=False, force_replay=False,
topchat_only=False, topchat_only=False,
logger=config.logger(__name__), logger=config.logger(__name__)
): ):
self._video_id = util.extract_video_id(video_id) self._video_id = util.extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
@@ -223,8 +223,9 @@ class LiveChatAsync:
'''Try to fetch archive chat data.''' '''Try to fetch archive chat data.'''
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
channelid = await util.get_channelid_async(client, self._video_id)
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, channelid)
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000)) continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(

View File

@@ -208,7 +208,7 @@ class LiveChat:
------- -------
'continuationContents' which includes metadata & chat data. 'continuationContents' which includes metadata & chat data.
''' '''
livechat_json = self._get_livechat_json(continuation, client, headers) livechat_json = self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json) contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat: if self._dat == '' and dat:
self._dat = dat self._dat = dat
@@ -218,7 +218,7 @@ class LiveChat:
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json( livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000)) continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
@@ -242,8 +242,8 @@ class LiveChat:
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms) param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
try: try:
resp = client.post(self._fetch_url, json=param) response = client.post(self._fetch_url, json=param)
livechat_json = resp.json() livechat_json = response.json()
break break
except (json.JSONDecodeError, httpx.HTTPError): except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2) time.sleep(2)

View File

@@ -3,8 +3,7 @@ from base64 import urlsafe_b64encode as b64enc
from urllib.parse import quote from urllib.parse import quote
def _header(video_id) -> str: def _header(video_id, channel_id) -> str:
channel_id = '_' * 24
S1_3 = enc.rs(1, video_id) S1_3 = enc.rs(1, video_id)
S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id) S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id)
S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5) S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5)
@@ -13,31 +12,26 @@ def _header(video_id) -> str:
return b64enc(header_replay) return b64enc(header_replay)
def _build(video_id, seektime, topchat_only) -> str: def _build(video_id, seektime, topchat_only, channel_id) -> str:
chattype = 4 if topchat_only else 1 chattype = 4 if topchat_only else 1
fetch_before_start = 3
timestamp = 1000
if seektime < 0: if seektime < 0:
fetch_before_start = 4 seektime = 0
elif seektime == 0: timestamp = int(seektime * 1000000)
timestamp = 1000 header = enc.rs(3, _header(video_id, channel_id))
else:
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id))
timestamp = enc.nm(5, timestamp) timestamp = enc.nm(5, timestamp)
s6 = enc.nm(6, 0) s6 = enc.nm(6, 0)
s7 = enc.nm(7, 0) s7 = enc.nm(7, 0)
s8 = enc.nm(8, 0) s8 = enc.nm(8, 0)
s9 = enc.nm(9, fetch_before_start) s9 = enc.nm(9, 4)
s10 = enc.rs(10, enc.nm(4, 0)) s10 = enc.rs(10, enc.nm(4, 0))
chattype = enc.rs(14, enc.nm(1, chattype)) chattype = enc.rs(14, enc.nm(1, 4))
s15 = enc.nm(15, 0) s15 = enc.nm(15, 0)
entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15)) entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15))
continuation = enc.rs(156074452, entity) continuation = enc.rs(156074452, entity)
return quote(b64enc(continuation).decode()) return quote(b64enc(continuation).decode())
def getparam(video_id, seektime=-1, topchat_only=False) -> str: def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str:
''' '''
Parameter Parameter
--------- ---------
@@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str:
topchat_only : bool topchat_only : bool
if True, fetch only 'top chat' if True, fetch only 'top chat'
''' '''
return _build(video_id, seektime, topchat_only) return _build(video_id, seektime, topchat_only, channel_id)

View File

@@ -28,7 +28,7 @@ class Parser:
def get_contents(self, jsn): def get_contents(self, jsn):
if jsn is None: if jsn is None:
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.')) self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"): if jsn.get("responseContext", {}).get("errors"):
raise exceptions.ResponseContextError( raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.') 'The video_id would be wrong, or video is deleted or private.')
contents = jsn.get('continuationContents') contents = jsn.get('continuationContents')

View File

@@ -3,6 +3,7 @@ import httpx
import json import json
import os import os
import re import re
from urllib.parse import quote
from .. import config from .. import config
from .. exceptions import InvalidVideoIdException from .. exceptions import InvalidVideoIdException
@@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)") PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")
YT_VIDEO_ID_LENGTH = 11 YT_VIDEO_ID_LENGTH = 11
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00")) CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
@@ -92,3 +95,26 @@ def extract_video_id(url_or_id: str) -> str:
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret return ret
def get_channelid(client, video_id):
resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret
async def get_channelid_async(client, video_id):
resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret

View File

@@ -1,19 +0,0 @@
import json
import httpx
import pytchat.config as config
from pytchat.paramgen import arcparam
from pytchat.parser.live import Parser
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", -1)
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"

View File

@@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock):
assert True assert True
def test_async_replay_stream(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
0] == "liveChatTextMessageRenderer"
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_async_force_replay(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
# assert not mix livechat data
assert list(rawdata[2]["addChatItemAction"]["item"].keys())[
0] != "liveChatPlaceholderItemRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_multithread_live_stream(httpx_mock: HTTPXMock): def test_multithread_live_stream(httpx_mock: HTTPXMock):