Compare commits

..

23 Commits

Author SHA1 Message Date
taizan-hokouto
b3ebe3879d Merge branch 'release/v0.5.2' 2021-01-17 22:41:20 +09:00
taizan-hokouto
da79895e55 Increment version 2021-01-17 22:40:15 +09:00
taizan-hokouto
aaa7421fdf Add replay_continuation parameter 2021-01-17 22:38:04 +09:00
taizan-hokuto
b9f213f047 Merge pull request #32 from miyuk/develop-add-replay-continuation
Add replay_continuation parameter
2021-01-15 00:11:04 +09:00
miyuk
fee070b299 Add replay_continuation parameter 2021-01-14 20:06:32 +09:00
taizan-hokouto
275e28b635 Merge tag 'v0.5.1' into develop
v0.5.1
2021-01-09 22:14:56 +09:00
taizan-hokouto
808e599be6 Merge branch 'release/v0.5.1' 2021-01-09 22:14:55 +09:00
taizan-hokouto
5cb6f7f123 Increment version 2021-01-09 22:14:30 +09:00
taizan-hokouto
a2f1c658f0 Merge branch 'master' into develop 2021-01-09 22:13:15 +09:00
taizan-hokouto
05de644d77 Merge branch 'hotfix/fix' 2021-01-09 22:13:15 +09:00
taizan-hokouto
b908855566 Delete unnecessary line 2021-01-09 22:12:33 +09:00
taizan-hokouto
8d93bfcb95 Merge branch 'master' into develop 2021-01-09 22:10:28 +09:00
taizan-hokouto
bf68859f38 Merge branch 'hotfix/fix' 2021-01-09 22:10:28 +09:00
taizan-hokouto
78fbe97b66 Fix process of fetching archived chat 2021-01-09 22:09:31 +09:00
taizan-hokouto
166a256c1c Merge tag 'v0.5.0' into develop
v0.5.0
2020-12-13 22:29:25 +09:00
taizan-hokouto
b7f2967a4f Merge branch 'release/v0.5.0' 2020-12-13 22:29:25 +09:00
taizan-hokouto
0a8ff3abdc Increment version 2020-12-13 22:28:39 +09:00
taizan-hokouto
9b38a5428d Add python version 2020-12-13 22:08:10 +09:00
taizan-hokuto
9311bf1993 Merge pull request #26 from zecktos/patch-1
Fix for python3.9
2020-12-13 22:04:18 +09:00
zecktos
ee839da7c9 Fix for python3.9
'encoding' is deprecated and removed in Python 3.9 
could fix this https://github.com/taizan-hokuto/pytchat/issues/24
2020-12-13 13:39:58 +01:00
taizan-hokouto
2ae77b3850 Merge branch 'hotfix/readme' 2020-12-13 14:22:05 +09:00
taizan-hokouto
afd7cea635 Merge branch 'master' into develop 2020-12-13 14:22:05 +09:00
taizan-hokouto
9018ff9ee4 Update README 2020-12-13 14:21:42 +09:00
11 changed files with 99 additions and 132 deletions

View File

@@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.7, 3.8]
python-version: [3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2

View File

@@ -188,19 +188,3 @@ Structure of author object.
[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
## Contributes
Great thanks:
Most of source code of CLI refer to:
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
Progress bar in CLI is based on:
[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3)
## Author
[taizan-hokuto](https://github.com/taizan-hokuto)
[twitter:@taizan205](https://twitter.com/taizan205)

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.4.9'
__version__ = '0.5.2'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -14,7 +14,6 @@ from .. import util
headers = config.headers
MAX_RETRY = 10
class PytchatCore:
'''
@@ -45,6 +44,10 @@ class PytchatCore:
If True, when exceptions occur, the exception is held internally,
and can be raised by raise_for_status().
replay_continuation : str
If this parameter is not None, the processor will attempt to get chat data from continuation.
This parameter is only allowed in archived mode.
Attributes
---------
_is_alive : bool
@@ -59,6 +62,7 @@ class PytchatCore:
topchat_only=False,
hold_exception=True,
logger=config.logger(__name__),
replay_continuation=None
):
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
@@ -67,32 +71,33 @@ class PytchatCore:
else:
self.processor = processor
self._is_alive = True
self._is_replay = force_replay
self._is_replay = force_replay or (replay_continuation is not None)
self._hold_exception = hold_exception
self._exception_holder = None
self._parser = Parser(
is_replay=self._is_replay,
exception_holder=self._exception_holder
)
self._first_fetch = True
self._fetch_url = config._sml
self._first_fetch = replay_continuation is None
self._fetch_url = config._sml if replay_continuation is None else config._smr
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger
self.continuation = replay_continuation
if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
self._setup()
def _setup(self):
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
if not self.continuation:
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, past_sec=3)
def _get_chat_component(self):
''' Fetch chat data and store them into buffer,
get next continuaiton parameter and loop.
@@ -143,8 +148,8 @@ class PytchatCore:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000)
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)[0])
if reload_continuation:
@@ -168,7 +173,7 @@ class PytchatCore:
with httpx.Client(http2=True) as client:
try:
response = client.post(self._fetch_url, json=param)
livechat_json = json.loads(response.text, encoding='utf-8')
livechat_json = json.loads(response.text)
break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e
@@ -179,7 +184,7 @@ class PytchatCore:
f"Exceeded retry count. Last error: {str(err)}")
self._raise_exception(exceptions.RetryExceedMaxCount())
return livechat_json
def get(self):
if self.is_alive():
chat_component = self._get_chat_component()

View File

@@ -62,6 +62,10 @@ class LiveChatAsync:
topchat_only : bool
If True, get only top chat.
replay_continuation : str
If this parameter is not None, the processor will attempt to get chat data from continuation.
This parameter is only allowed in archived mode.
Attributes
---------
_is_alive : bool
@@ -82,6 +86,7 @@ class LiveChatAsync:
force_replay=False,
topchat_only=False,
logger=config.logger(__name__),
replay_continuation=None
):
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
@@ -95,17 +100,18 @@ class LiveChatAsync:
self._exception_handler = exception_handler
self._direct_mode = direct_mode
self._is_alive = True
self._is_replay = force_replay
self._is_replay = force_replay or (replay_continuation is not None)
self._parser = Parser(is_replay=self._is_replay)
self._pauser = Queue()
self._pauser.put_nowait(None)
self._first_fetch = True
self._fetch_url = config._sml
self._first_fetch = replay_continuation is None
self._fetch_url = config._sml if replay_continuation is None else config._smr
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger
self.exception = None
self.continuation = replay_continuation
LiveChatAsync._logger = logger
if exception_handler:
@@ -145,8 +151,9 @@ class LiveChatAsync:
"""Fetch first continuation parameter,
create and start _listen loop.
"""
initial_continuation = liveparam.getparam(self._video_id, 3)
await self._listen(initial_continuation)
if not self.continuation:
self.continuation = liveparam.getparam(self._video_id, 3)
await self._listen(self.continuation)
async def _listen(self, continuation):
''' Fetch chat data and store them into buffer,
@@ -163,6 +170,9 @@ class LiveChatAsync:
continuation = await self._check_pause(continuation)
contents = await self._get_contents(continuation, client, headers)
metadata, chatdata = self._parser.parse(contents)
continuation = metadata.get('continuation')
if continuation:
self.continuation = continuation
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self._video_id,
@@ -181,7 +191,6 @@ class LiveChatAsync:
await self._buffer.put(chat_component)
diff_time = timeout - (time.time() - time_mark)
await asyncio.sleep(diff_time)
continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
@@ -223,8 +232,9 @@ class LiveChatAsync:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = config._smr
channelid = await util.get_channelid_async(client, self._video_id)
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, channelid)
livechat_json = (await self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
@@ -241,7 +251,6 @@ class LiveChatAsync:
'''
Get json which includes chat data.
'''
# continuation = urllib.parse.quote(continuation)
livechat_json = None
if offset_ms < 0:
offset_ms = 0

View File

@@ -60,6 +60,10 @@ class LiveChat:
topchat_only : bool
If True, get only top chat.
replay_continuation : str
If this parameter is not None, the processor will attempt to get chat data from continuation.
This parameter is only allowed in archived mode.
Attributes
---------
_executor : ThreadPoolExecutor
@@ -81,7 +85,8 @@ class LiveChat:
direct_mode=False,
force_replay=False,
topchat_only=False,
logger=config.logger(__name__)
logger=config.logger(__name__),
replay_continuation=None
):
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
@@ -95,17 +100,19 @@ class LiveChat:
self._executor = ThreadPoolExecutor(max_workers=2)
self._direct_mode = direct_mode
self._is_alive = True
self._is_replay = force_replay
self._is_replay = force_replay or (replay_continuation is not None)
self._parser = Parser(is_replay=self._is_replay)
self._pauser = Queue()
self._pauser.put_nowait(None)
self._first_fetch = True
self._fetch_url = config._sml
self._first_fetch = replay_continuation is None
self._fetch_url = config._sml if replay_continuation is None else config._smr
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._event = Event()
self._logger = logger
self._event = Event()
self.continuation = replay_continuation
self.exception = None
if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
@@ -140,8 +147,9 @@ class LiveChat:
"""Fetch first continuation parameter,
create and start _listen loop.
"""
initial_continuation = liveparam.getparam(self._video_id, 3)
self._listen(initial_continuation)
if not self.continuation:
self.continuation = liveparam.getparam(self._video_id, 3)
self._listen(self.continuation)
def _listen(self, continuation):
''' Fetch chat data and store them into buffer,
@@ -158,6 +166,9 @@ class LiveChat:
continuation = self._check_pause(continuation)
contents = self._get_contents(continuation, client, headers)
metadata, chatdata = self._parser.parse(contents)
continuation = metadata.get('continuation')
if continuation:
self.continuation = continuation
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self._video_id,
@@ -176,7 +187,6 @@ class LiveChat:
self._buffer.put(chat_component)
diff_time = timeout - (time.time() - time_mark)
self._event.wait(diff_time if diff_time > 0 else 0)
continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
@@ -196,7 +206,8 @@ class LiveChat:
'''
self._pauser.put_nowait(None)
if not self._is_replay:
continuation = liveparam.getparam(self._video_id, 3)
continuation = liveparam.getparam(
self._video_id, 3, self._topchat_only)
return continuation
def _get_contents(self, continuation, client, headers):
@@ -208,7 +219,7 @@ class LiveChat:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = self._get_livechat_json(continuation, client, headers)
livechat_json = self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
@@ -218,7 +229,7 @@ class LiveChat:
self._parser.is_replay = True
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
self._video_id, self.seektime, self._topchat_only, util.get_channelid(client, self._video_id))
livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
@@ -235,15 +246,14 @@ class LiveChat:
'''
Get json which includes chat data.
'''
# continuation = urllib.parse.quote(continuation)
livechat_json = None
if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
try:
resp = client.post(self._fetch_url, json=param)
livechat_json = resp.json()
response = client.post(self._fetch_url, json=param)
livechat_json = response.json()
break
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)

View File

@@ -3,8 +3,7 @@ from base64 import urlsafe_b64encode as b64enc
from urllib.parse import quote
def _header(video_id) -> str:
channel_id = '_' * 24
def _header(video_id, channel_id) -> str:
S1_3 = enc.rs(1, video_id)
S1_5 = enc.rs(1, channel_id) + enc.rs(2, video_id)
S1 = enc.rs(3, S1_3) + enc.rs(5, S1_5)
@@ -13,31 +12,26 @@ def _header(video_id) -> str:
return b64enc(header_replay)
def _build(video_id, seektime, topchat_only) -> str:
def _build(video_id, seektime, topchat_only, channel_id) -> str:
chattype = 4 if topchat_only else 1
fetch_before_start = 3
timestamp = 1000
if seektime < 0:
fetch_before_start = 4
elif seektime == 0:
timestamp = 1000
else:
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id))
seektime = 0
timestamp = int(seektime * 1000000)
header = enc.rs(3, _header(video_id, channel_id))
timestamp = enc.nm(5, timestamp)
s6 = enc.nm(6, 0)
s7 = enc.nm(7, 0)
s8 = enc.nm(8, 0)
s9 = enc.nm(9, fetch_before_start)
s9 = enc.nm(9, 4)
s10 = enc.rs(10, enc.nm(4, 0))
chattype = enc.rs(14, enc.nm(1, chattype))
chattype = enc.rs(14, enc.nm(1, 4))
s15 = enc.nm(15, 0)
entity = b''.join((header, timestamp, s6, s7, s8, s9, s10, chattype, s15))
continuation = enc.rs(156074452, entity)
return quote(b64enc(continuation).decode())
def getparam(video_id, seektime=-1, topchat_only=False) -> str:
def getparam(video_id, seektime=0, topchat_only=False, channel_id='') -> str:
'''
Parameter
---------
@@ -47,4 +41,4 @@ def getparam(video_id, seektime=-1, topchat_only=False) -> str:
topchat_only : bool
if True, fetch only 'top chat'
'''
return _build(video_id, seektime, topchat_only)
return _build(video_id, seektime, topchat_only, channel_id)

View File

@@ -28,7 +28,7 @@ class Parser:
def get_contents(self, jsn):
if jsn is None:
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
if jsn.get("responseContext", {}).get("errors"):
raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
contents = jsn.get('continuationContents')

View File

@@ -3,6 +3,7 @@ import httpx
import json
import os
import re
from urllib.parse import quote
from .. import config
from .. exceptions import InvalidVideoIdException
@@ -10,6 +11,8 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")
YT_VIDEO_ID_LENGTH = 11
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
@@ -92,3 +95,26 @@ def extract_video_id(url_or_id: str) -> str:
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret
def get_channelid(client, video_id):
resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret
async def get_channelid_async(client, video_id):
resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
match = re.search(PATTERN_CHANNEL, resp.text)
if match is None:
raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
try:
ret = match.group(1)
except IndexError:
raise InvalidVideoIdException(f"Invalid video id: {video_id}")
return ret

View File

@@ -1,19 +0,0 @@
import json
import httpx
import pytchat.config as config
from pytchat.paramgen import arcparam
from pytchat.parser.live import Parser
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", -1)
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
assert param == "op2w0wSHARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKIDQ28P0AjAAOABAAEgDUgIgAHICCAF4AA%3D%3D"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wSDARpsQ2pnYURRb0xNREV5TXpRMU5qYzRPVEFxSndvWVgxOWZYMTlmWDE5ZlgxOWZYMTlmWDE5ZlgxOWZYMTlmRWdzd01USXpORFUyTnpnNU1Cb1Q2cWpkdVFFTkNnc3dNVEl6TkRVMk56ZzVNQ0FCKOgHMAA4AEAASARSAiAAcgIIAXgA"

View File

@@ -46,48 +46,6 @@ def test_async_live_stream(httpx_mock: HTTPXMock):
assert True
def test_async_replay_stream(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
0] == "liveChatTextMessageRenderer"
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_async_force_replay(httpx_mock: HTTPXMock):
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
async def test_loop():
chat = LiveChatAsync(
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
chats = await chat.get()
rawdata = chats[0]["chatdata"]
# assert fetching replaychat data
assert list(rawdata[14]["addChatItemAction"]["item"].keys())[
0] == "liveChatPaidMessageRenderer"
# assert not mix livechat data
assert list(rawdata[2]["addChatItemAction"]["item"].keys())[
0] != "liveChatPlaceholderItemRenderer"
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(test_loop())
except CancelledError:
assert True
def test_multithread_live_stream(httpx_mock: HTTPXMock):