Compare commits

..

17 Commits

Author SHA1 Message Date
taizan-hokouto
808e599be6 Merge branch 'release/v0.5.1' 2021-01-09 22:14:55 +09:00
taizan-hokouto
5cb6f7f123 Increment version 2021-01-09 22:14:30 +09:00
taizan-hokouto
05de644d77 Merge branch 'hotfix/fix' 2021-01-09 22:13:15 +09:00
taizan-hokouto
a2f1c658f0 Merge branch 'master' into develop 2021-01-09 22:13:15 +09:00
taizan-hokouto
b908855566 Delete unnecessary line 2021-01-09 22:12:33 +09:00
taizan-hokouto
bf68859f38 Merge branch 'hotfix/fix' 2021-01-09 22:10:28 +09:00
taizan-hokouto
8d93bfcb95 Merge branch 'master' into develop 2021-01-09 22:10:28 +09:00
taizan-hokouto
78fbe97b66 Fix process of fetching archived chat 2021-01-09 22:09:31 +09:00
taizan-hokouto
b7f2967a4f Merge branch 'release/v0.5.0' 2020-12-13 22:29:25 +09:00
taizan-hokouto
166a256c1c Merge tag 'v0.5.0' into develop
v0.5.0
2020-12-13 22:29:25 +09:00
taizan-hokouto
0a8ff3abdc Increment version 2020-12-13 22:28:39 +09:00
taizan-hokouto
9b38a5428d Add python version 2020-12-13 22:08:10 +09:00
taizan-hokuto
9311bf1993 Merge pull request #26 from zecktos/patch-1
Fix for python3.9
2020-12-13 22:04:18 +09:00
zecktos
ee839da7c9 Fix for python3.9
'encoding' is deprecated and removed in Python 3.9 
could fix this https://github.com/taizan-hokuto/pytchat/issues/24
2020-12-13 13:39:58 +01:00
taizan-hokouto
2ae77b3850 Merge branch 'hotfix/readme' 2020-12-13 14:22:05 +09:00
taizan-hokouto
afd7cea635 Merge branch 'master' into develop 2020-12-13 14:22:05 +09:00
taizan-hokouto
9018ff9ee4 Update README 2020-12-13 14:21:42 +09:00
11 changed files with 49 additions and 106 deletions

View File

@@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.7, 3.8]
python-version: [3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2

View File

@@ -188,19 +188,3 @@ Structure of author object.
[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
## Contributes
Great thanks:
Most of source code of CLI refer to:
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
Progress bar in CLI is based on:
[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3)
## Author
[taizan-hokuto](https://github.com/taizan-hokuto)
[twitter:@taizan205](https://twitter.com/taizan205)

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.4.9'
__version__ = '0.5.1'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -14,7 +14,6 @@ from .. import util
headers = config.headers
MAX_RETRY = 10
class PytchatCore:
'''
@@ -89,7 +88,7 @@ class PytchatCore:
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
self.continuation = liveparam.getparam(self._video_id, past_sec=3)
def _get_chat_component(self):
@@ -143,8 +142,8 @@ class PytchatCore:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)[0])
if reload_continuation:
@@ -168,7 +167,7 @@ class PytchatCore:
with httpx.Client(http2=True) as client:
try:
response = client.post(self._fetch_url, json=param)
livechat_json = json.loads(response.text, encoding='utf-8')
livechat_json = json.loads(response.text)
break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e

View File

@@ -81,7 +81,7 @@ class LiveChatAsync:
direct_mode=False,
force_replay=False,
topchat_only=False,
logger=config.logger(__name__),
logger=config.logger(__name__)
):
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
@@ -223,8 +223,9 @@ class LiveChatAsync:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = config._smr
channelid = await util.get_channelid_async(client, self._video_id)
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, channelid)
livechat_json = (await self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(

View File

@@ -208,7 +208,7 @@ class LiveChat:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = self._get_livechat_json(continuation, client, headers)
livechat_json = self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
@@ -218,7 +218,7 @@ class LiveChat:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
@@ -242,8 +242,8 @@ class LiveChat:
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
try:
resp = client.post(self._fetch_url, json=param)
livechat_json = resp.json()
response = client.post(self._fetch_url, json=param)
livechat_json = response.json()
break
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)

View File

@@ -3,8 +3,7 @@ from base64 import urlsafe_b64encode as b64enc
from urllib.parse import quote
def _header(video_id) -> str:
channel_id = '_' * 24
def _header(video_id, channel_id) -> str:
S1_3 = enc.rs(1, video_id)
S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id)
S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5)
@@ -13,31 +12,26 @@ def _header(video_id) -> str:
return b64enc(header_replay)
def _build(video_id, seektime, topchat_only) -> str:
def _build(video_id, seektime, topchat_only, channel_id) -> str:
chattype = 4 if topchat_only else 1
fetch_before_start = 3
timestamp = 1000
if seektime < 0:
fetch_before_start = 4
elif seektime == 0:
timestamp = 1000
else:
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id))
seektime = 0
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id, channel_id))
timestamp = enc.nm(5, timestamp)
s6 = enc.nm(6, 0)
s7 = enc.nm(7, 0)
s8 = enc.nm(8, 0)
s9 = enc.nm(9, fetch_before_start)
s9 = enc.nm(9, 4)
s10 = enc.rs(10, enc.nm(4, 0))
chattype = enc.rs(14, enc.nm(1, chattype))
chattype = enc.rs(14, enc.nm(1, 4))
s15 = enc.nm(15, 0)
entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15))
continuation = enc.rs(156074452, entity)
return quote(b64enc(continuation).decode())
def getparam(video_id, seektime=-1, topchat_only=False) -> str:
def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str:
'''
Parameter
---------
@@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str:
topchat_only : bool
if True, fetch only 'top chat'
'''
return _build(video_id, seektime, topchat_only)
return _build(video_id, seektime, topchat_only, channel_id)

View File

@@ -28,7 +28,7 @@ class Parser:
def get_contents(self, jsn):
if jsn is None:
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
if jsn.get("responseContext", {}).get("errors"):
raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
contents = jsn.get('continuationContents')

View File

@@ -3,6 +3,7 @@ import httpx
import json
import os
import re
from urllib.parse import quote
from .. import config
from .. exceptions import InvalidVideoIdException
@@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")
YT_VIDEO_ID_LENGTH = 11
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
@@ -92,3 +95,26 @@ def extract_video_id(url_or_id: str) -> str:
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret
def get_channelid(client, video_id):
resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret
async def get_channelid_async(client, video_id):
resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret

View File

@@ -1,19 +0,0 @@
import json
import httpx
import pytchat.config as config
from pytchat.paramgen import arcparam
from pytchat.parser.live import Parser
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", -1)
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"

View File

@@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock):
assert True
def test_async_replay_stream(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
0] == "liveChatTextMessageRenderer"
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_async_force_replay(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
# assert not mix livechat data
assert list(rawdata[2]["addChatItemAction"]["item"].keys())[
0] != "liveChatPlaceholderItemRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_multithread_live_stream(httpx_mock: HTTPXMock):