Merge branch 'hotfix/json_decode_error'
This commit is contained in:
@@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat.
|
||||
pytchat is a python library for fetching youtube live chat
|
||||
without using youtube api, Selenium or BeautifulSoup.
|
||||
|
||||
pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。
|
||||
pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
|
||||
|
||||
Other features:
|
||||
+ Customizable chat data processors including youtube api compatible one.
|
||||
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
|
||||
+ Available on asyncio context.
|
||||
+ Quick fetching of initial chat data by generating continuation params
|
||||
instead of web scraping.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
|
||||
"""
|
||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||
__version__ = '0.0.6.5'
|
||||
__version__ = '0.0.6.6'
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'taizan-hokuto'
|
||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||
|
||||
@@ -22,9 +22,10 @@ def main():
|
||||
# Arguments
|
||||
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
||||
help='Video IDs separated by commas without space')
|
||||
help='Video IDs separated by commas without space.\n'
|
||||
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||
help='Output directory (end with "/")', default='./')
|
||||
help='Output directory (end with "/"). default="./"', default='./')
|
||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||
help='Settings version')
|
||||
Arguments(parser.parse_args().__dict__)
|
||||
@@ -43,11 +44,17 @@ def main():
|
||||
f" video_id: {video_id}\n"
|
||||
f" channel: {info.get_channel_name()}\n"
|
||||
f" title: {info.get_title()}")
|
||||
path = Path(Arguments().output+video_id+'.html')
|
||||
print(f"output path: {path.resolve()}")
|
||||
Extractor(video_id,
|
||||
processor = HTMLArchiver(Arguments().output+video_id+'.html')
|
||||
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
|
||||
callback = _disp_progress
|
||||
).extract()
|
||||
print("Extraction end.\n")
|
||||
print("\nExtraction end.\n")
|
||||
except (InvalidVideoIdException, NoContentsException) as e:
|
||||
print(e)
|
||||
return
|
||||
parser.print_help()
|
||||
|
||||
def _disp_progress(a,b):
|
||||
print('.',end="",flush=True)
|
||||
|
||||
@@ -1,46 +1,52 @@
|
||||
class ChatParseException(Exception):
|
||||
'''
|
||||
チャットデータをパースするライブラリが投げる例外の基底クラス
|
||||
Base exception thrown by the parser
|
||||
'''
|
||||
pass
|
||||
|
||||
class NoYtinitialdataException(ChatParseException):
|
||||
'''
|
||||
配信ページ内にチャットデータurlが見つからないときに投げる例外
|
||||
Thrown when the video is not found.
|
||||
'''
|
||||
pass
|
||||
|
||||
class ResponseContextError(ChatParseException):
|
||||
'''
|
||||
配信ページでチャットデータ無効の時に投げる例外
|
||||
Thrown when chat data is invalid.
|
||||
'''
|
||||
pass
|
||||
|
||||
class NoLivechatRendererException(ChatParseException):
|
||||
'''
|
||||
チャットデータのJSON中にlivechatRendererがない時に投げる例外
|
||||
Thrown when livechatRenderer is missing in JSON.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
class NoContentsException(ChatParseException):
|
||||
'''
|
||||
チャットデータのJSON中にContinuationContentsがない時に投げる例外
|
||||
Thrown when ContinuationContents is missing in JSON.
|
||||
'''
|
||||
pass
|
||||
|
||||
class NoContinuationsException(ChatParseException):
|
||||
'''
|
||||
チャットデータのContinuationContents中にcontinuationがない時に投げる例外
|
||||
Thrown when continuation is missing in ContinuationContents.
|
||||
'''
|
||||
pass
|
||||
|
||||
class IllegalFunctionCall(Exception):
|
||||
'''
|
||||
set_callback()を実行済みにもかかわらず
|
||||
get()を呼び出した場合の例外
|
||||
Thrown when get () is called even though
|
||||
set_callback () has been executed.
|
||||
'''
|
||||
pass
|
||||
|
||||
class InvalidVideoIdException(Exception):
|
||||
'''
|
||||
Thrown when the video_id is not exist (VideoInfo).
|
||||
'''
|
||||
pass
|
||||
|
||||
class UnknownConnectionError(Exception):
|
||||
pass
|
||||
@@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
|
||||
,'type','authorChannel']
|
||||
|
||||
HEADER_HTML = '''
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||
'''
|
||||
|
||||
class HTMLArchiver(ChatProcessor):
|
||||
'''
|
||||
HtmlArchiver saves chat data as HTML table format.
|
||||
@@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor):
|
||||
super().__init__()
|
||||
self.save_path = self._checkpath(save_path)
|
||||
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||
f.write(HEADER_HTML)
|
||||
f.write('<table border="1" style="border-collapse: collapse">')
|
||||
f.writelines(self._parse_html_header(fmt_headers))
|
||||
self.processor = DefaultProcessor()
|
||||
|
||||
@@ -7,12 +7,15 @@ from . worker import ExtractWorker
|
||||
from . patch import Patch
|
||||
from ... import config
|
||||
from ... paramgen import arcparam
|
||||
from ... exceptions import UnknownConnectionError
|
||||
from concurrent.futures import CancelledError
|
||||
from json import JSONDecodeError
|
||||
from urllib.parse import quote
|
||||
|
||||
headers = config.headers
|
||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
||||
"get_live_chat_replay?continuation="
|
||||
MAX_RETRY_COUNT = 3
|
||||
|
||||
def _split(start, end, count, min_interval_sec = 120):
|
||||
"""
|
||||
@@ -53,13 +56,22 @@ def ready_blocks(video_id, duration, div, callback):
|
||||
tasks = [_create_block(session, video_id, seektime, callback)
|
||||
for seektime in _split(-1, duration, div)]
|
||||
return await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
async def _create_block(session, video_id, seektime, callback):
|
||||
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||
async with session.get(url, headers = headers) as resp:
|
||||
text = await resp.text()
|
||||
next_continuation, actions = parser.parse(json.loads(text))
|
||||
for _ in range(MAX_RETRY_COUNT):
|
||||
try :
|
||||
async with session.get(url, headers = headers) as resp:
|
||||
text = await resp.text()
|
||||
next_continuation, actions = parser.parse(json.loads(text))
|
||||
break
|
||||
except JSONDecodeError:
|
||||
await asyncio.sleep(3)
|
||||
else:
|
||||
cancel()
|
||||
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||
|
||||
if actions:
|
||||
first = parser.get_offset(actions[0])
|
||||
last = parser.get_offset(actions[-1])
|
||||
@@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback):
|
||||
first = first,
|
||||
last = last
|
||||
)
|
||||
|
||||
"""
|
||||
fetch initial blocks.
|
||||
"""
|
||||
@@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id):
|
||||
|
||||
async def _fetch(continuation,session) -> Patch:
|
||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||
async with session.get(url,headers = config.headers) as resp:
|
||||
chat_json = await resp.text()
|
||||
continuation, actions = parser.parse(json.loads(chat_json))
|
||||
for _ in range(MAX_RETRY_COUNT):
|
||||
try:
|
||||
async with session.get(url,headers = config.headers) as resp:
|
||||
chat_json = await resp.text()
|
||||
continuation, actions = parser.parse(json.loads(chat_json))
|
||||
break
|
||||
except JSONDecodeError:
|
||||
await asyncio.sleep(3)
|
||||
else:
|
||||
cancel()
|
||||
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||
|
||||
if actions:
|
||||
last = parser.get_offset(actions[-1])
|
||||
first = parser.get_offset(actions[0])
|
||||
@@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id):
|
||||
callback(actions, last - first)
|
||||
return Patch(actions, continuation, first, last)
|
||||
return Patch(continuation = continuation)
|
||||
|
||||
"""
|
||||
allocate workers and assign blocks.
|
||||
"""
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_process_0():
|
||||
chat_component = {
|
||||
'video_id':'',
|
||||
'timeout':10,
|
||||
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json")
|
||||
'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
|
||||
}
|
||||
assert SuperchatCalculator().process([chat_component])=={'¥': 6800.0, '€': 2.0}
|
||||
|
||||
@@ -47,7 +47,7 @@ def test_process_1():
|
||||
chat_component = {
|
||||
'video_id':'',
|
||||
'timeout':10,
|
||||
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json")
|
||||
'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
|
||||
}
|
||||
assert SuperchatCalculator().process([chat_component])=={}
|
||||
|
||||
@@ -59,7 +59,7 @@ def test_process_2():
|
||||
chat_component = {
|
||||
'video_id':'',
|
||||
'timeout':10,
|
||||
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json")
|
||||
'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
|
||||
}
|
||||
assert False
|
||||
SuperchatCalculator().process([chat_component])
|
||||
|
||||
Reference in New Issue
Block a user