Compare commits

..

17 Commits

Author SHA1 Message Date
taizan-hokouto
808e599be6 Merge branch 'release/v0.5.1' 2021-01-09 22:14:55 +09:00
taizan-hokouto
5cb6f7f123 Increment version 2021-01-09 22:14:30 +09:00
taizan-hokouto
05de644d77 Merge branch 'hotfix/fix' 2021-01-09 22:13:15 +09:00
taizan-hokouto
a2f1c658f0 Merge branch 'master' into develop 2021-01-09 22:13:15 +09:00
taizan-hokouto
b908855566 Delete unnecessary line 2021-01-09 22:12:33 +09:00
taizan-hokouto
bf68859f38 Merge branch 'hotfix/fix' 2021-01-09 22:10:28 +09:00
taizan-hokouto
8d93bfcb95 Merge branch 'master' into develop 2021-01-09 22:10:28 +09:00
taizan-hokouto
78fbe97b66 Fix process of fetching archived chat 2021-01-09 22:09:31 +09:00
taizan-hokouto
b7f2967a4f Merge branch 'release/v0.5.0' 2020-12-13 22:29:25 +09:00
taizan-hokouto
166a256c1c Merge tag 'v0.5.0' into develop
v0.5.0
2020-12-13 22:29:25 +09:00
taizan-hokouto
0a8ff3abdc Increment version 2020-12-13 22:28:39 +09:00
taizan-hokouto
9b38a5428d Add python version 2020-12-13 22:08:10 +09:00
taizan-hokuto
9311bf1993 Merge pull request #26 from zecktos/patch-1
Fix for python3.9
2020-12-13 22:04:18 +09:00
zecktos
ee839da7c9 Fix for python3.9
'encoding' is deprecated and removed in Python 3.9 
could fix this https://github.com/taizan-hokuto/pytchat/issues/24
2020-12-13 13:39:58 +01:00
taizan-hokouto
2ae77b3850 Merge branch 'hotfix/readme' 2020-12-13 14:22:05 +09:00
taizan-hokouto
afd7cea635 Merge branch 'master' into develop 2020-12-13 14:22:05 +09:00
taizan-hokouto
9018ff9ee4 Update README 2020-12-13 14:21:42 +09:00
11 changed files with 49 additions and 106 deletions

View File

@@ -9,7 +9,7 @@ jobs:
strategy: strategy:
max-parallel: 4 max-parallel: 4
matrix: matrix:
python-version: [3.7, 3.8] python-version: [3.7, 3.8, 3.9]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2

View File

@@ -188,19 +188,3 @@ Structure of author object.
[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE) [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
## Contributes
Great thanks:
Most of source code of CLI refer to:
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
Progress bar in CLI is based on:
[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3)
## Author
[taizan-hokuto](https://github.com/taizan-hokuto)
[twitter:@taizan205](https://twitter.com/taizan205)

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto' __copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.4.9' __version__ = '0.5.1'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -14,7 +14,6 @@ from .. import util
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
class PytchatCore: class PytchatCore:
''' '''
@@ -89,7 +88,7 @@ class PytchatCore:
"""Fetch first continuation parameter, """Fetch first continuation parameter,
create and start _listen loop. create and start _listen loop.
""" """
self.continuation = liveparam.getparam(self._video_id, 3) self.continuation = liveparam.getparam(self._video_id, past_sec=3)
def _get_chat_component(self): def _get_chat_component(self):
@@ -143,8 +142,8 @@ class PytchatCore:
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)) livechat_json = self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)[0]) self._parser.get_contents(livechat_json)[0])
if reload_continuation: if reload_continuation:
@@ -168,7 +167,7 @@ class PytchatCore:
with httpx.Client(http2=True) as client: with httpx.Client(http2=True) as client:
try: try:
response = client.post(self._fetch_url, json=param) response = client.post(self._fetch_url, json=param)
livechat_json = json.loads(response.text, encoding='utf-8') livechat_json = json.loads(response.text)
break break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e: except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e err = e

View File

@@ -81,7 +81,7 @@ class LiveChatAsync:
direct_mode=False, direct_mode=False,
force_replay=False, force_replay=False,
topchat_only=False, topchat_only=False,
logger=config.logger(__name__), logger=config.logger(__name__)
): ):
self._video_id = util.extract_video_id(video_id) self._video_id = util.extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
@@ -223,8 +223,9 @@ class LiveChatAsync:
'''Try to fetch archive chat data.''' '''Try to fetch archive chat data.'''
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
channelid = await util.get_channelid_async(client, self._video_id)
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, channelid)
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000)) continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(

View File

@@ -208,7 +208,7 @@ class LiveChat:
------- -------
'continuationContents' which includes metadata & chat data. 'continuationContents' which includes metadata & chat data.
''' '''
livechat_json = self._get_livechat_json(continuation, client, headers) livechat_json = self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json) contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat: if self._dat == '' and dat:
self._dat = dat self._dat = dat
@@ -218,7 +218,7 @@ class LiveChat:
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = config._smr self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json( livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000)) continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
@@ -242,8 +242,8 @@ class LiveChat:
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms) param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
try: try:
resp = client.post(self._fetch_url, json=param) response = client.post(self._fetch_url, json=param)
livechat_json = resp.json() livechat_json = response.json()
break break
except (json.JSONDecodeError, httpx.HTTPError): except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2) time.sleep(2)

View File

@@ -3,8 +3,7 @@ from base64 import urlsafe_b64encode as b64enc
from urllib.parse import quote from urllib.parse import quote
def _header(video_id) -> str: def _header(video_id, channel_id) -> str:
channel_id = '_' * 24
S1_3 = enc.rs(1, video_id) S1_3 = enc.rs(1, video_id)
S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id) S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id)
S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5) S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5)
@@ -13,31 +12,26 @@ def _header(video_id) -> str:
return b64enc(header_replay) return b64enc(header_replay)
def _build(video_id, seektime, topchat_only) -> str: def _build(video_id, seektime, topchat_only, channel_id) -> str:
chattype = 4 if topchat_only else 1 chattype = 4 if topchat_only else 1
fetch_before_start = 3
timestamp = 1000
if seektime < 0: if seektime < 0:
fetch_before_start = 4 seektime = 0
elif seektime == 0: timestamp = int(seektime * 1000000)
timestamp = 1000 header = enc.rs(3, _header(video_id, channel_id))
else:
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id))
timestamp = enc.nm(5, timestamp) timestamp = enc.nm(5, timestamp)
s6 = enc.nm(6, 0) s6 = enc.nm(6, 0)
s7 = enc.nm(7, 0) s7 = enc.nm(7, 0)
s8 = enc.nm(8, 0) s8 = enc.nm(8, 0)
s9 = enc.nm(9, fetch_before_start) s9 = enc.nm(9, 4)
s10 = enc.rs(10, enc.nm(4, 0)) s10 = enc.rs(10, enc.nm(4, 0))
chattype = enc.rs(14, enc.nm(1, chattype)) chattype = enc.rs(14, enc.nm(1, 4))
s15 = enc.nm(15, 0) s15 = enc.nm(15, 0)
entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15)) entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15))
continuation = enc.rs(156074452, entity) continuation = enc.rs(156074452, entity)
return quote(b64enc(continuation).decode()) return quote(b64enc(continuation).decode())
def getparam(video_id, seektime=-1, topchat_only=False) -> str: def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str:
''' '''
Parameter Parameter
--------- ---------
@@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str:
topchat_only : bool topchat_only : bool
if True, fetch only 'top chat' if True, fetch only 'top chat'
''' '''
return _build(video_id, seektime, topchat_only) return _build(video_id, seektime, topchat_only, channel_id)

View File

@@ -28,7 +28,7 @@ class Parser:
def get_contents(self, jsn): def get_contents(self, jsn):
if jsn is None: if jsn is None:
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.')) self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"): if jsn.get("responseContext", {}).get("errors"):
raise exceptions.ResponseContextError( raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.') 'The video_id would be wrong, or video is deleted or private.')
contents = jsn.get('continuationContents') contents = jsn.get('continuationContents')

View File

@@ -3,6 +3,7 @@ import httpx
import json import json
import os import os
import re import re
from urllib.parse import quote
from .. import config from .. import config
from .. exceptions import InvalidVideoIdException from .. exceptions import InvalidVideoIdException
@@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)") PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")
YT_VIDEO_ID_LENGTH = 11 YT_VIDEO_ID_LENGTH = 11
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00")) CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
@@ -92,3 +95,26 @@ def extract_video_id(url_or_id: str) -> str:
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}") raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret return ret
def get_channelid(client, video_id):
resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret
async def get_channelid_async(client, video_id):
resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret

View File

@@ -1,19 +0,0 @@
import json
import httpx
import pytchat.config as config
from pytchat.paramgen import arcparam
from pytchat.parser.live import Parser
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", -1)
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"

View File

@@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock):
assert True assert True
def test_async_replay_stream(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
0] == "liveChatTextMessageRenderer"
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_async_force_replay(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
# assert not mix livechat data
assert list(rawdata[2]["addChatItemAction"]["item"].keys())[
0] != "liveChatPlaceholderItemRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_multithread_live_stream(httpx_mock: HTTPXMock): def test_multithread_live_stream(httpx_mock: HTTPXMock):