Merge tag 'json_decode_error' into develop

v0.0.6.6
This commit is contained in:
taizan-hokuto
2020-03-14 09:43:37 +09:00
7 changed files with 67 additions and 25 deletions

View File

@@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat.
pytchat is a python library for fetching youtube live chat pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup. without using youtube api, Selenium or BeautifulSoup.
pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。 pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
Other features: Other features:
+ Customizable chat data processors including youtube api compatible one. + Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
+ Available on asyncio context. + Available on asyncio context.
+ Quick fetching of initial chat data by generating continuation params + Quick fetching of initial chat data by generating continuation params
instead of web scraping. instead of web scraping.

View File

@@ -2,7 +2,7 @@
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup. pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.0.6.5' __version__ = '0.0.6.6'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -22,9 +22,10 @@ def main():
# Arguments # Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
help='Video IDs separated by commas without space') help='Video IDs separated by commas without space.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/")', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version') help='Settings version')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
@@ -43,11 +44,17 @@ def main():
f" video_id: {video_id}\n" f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}") f" title: {info.get_title()}")
path = Path(Arguments().output+video_id+'.html')
print(f"output path: {path.resolve()}")
Extractor(video_id, Extractor(video_id,
processor = HTMLArchiver(Arguments().output+video_id+'.html') processor = HTMLArchiver(Arguments().output+video_id+'.html'),
callback = _disp_progress
).extract() ).extract()
print("Extraction end.\n") print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContentsException) as e: except (InvalidVideoIdException, NoContentsException) as e:
print(e) print(e)
return return
parser.print_help() parser.print_help()
def _disp_progress(a,b):
print('.',end="",flush=True)

View File

@@ -1,46 +1,52 @@
class ChatParseException(Exception): class ChatParseException(Exception):
''' '''
チャットデータをパースするライブラリが投げる例外の基底クラス Base exception thrown by the parser
''' '''
pass pass
class NoYtinitialdataException(ChatParseException): class NoYtinitialdataException(ChatParseException):
''' '''
配信ページ内にチャットデータurlが見つからないときに投げる例外 Thrown when the video is not found.
''' '''
pass pass
class ResponseContextError(ChatParseException): class ResponseContextError(ChatParseException):
''' '''
配信ページでチャットデータ無効の時に投げる例外 Thrown when chat data is invalid.
''' '''
pass pass
class NoLivechatRendererException(ChatParseException): class NoLivechatRendererException(ChatParseException):
''' '''
チャットデータのJSON中にlivechatRendererがない時に投げる例外 Thrown when livechatRenderer is missing in JSON.
''' '''
pass pass
class NoContentsException(ChatParseException): class NoContentsException(ChatParseException):
''' '''
チャットデータのJSON中にContinuationContentsがない時に投げる例外 Thrown when ContinuationContents is missing in JSON.
''' '''
pass pass
class NoContinuationsException(ChatParseException): class NoContinuationsException(ChatParseException):
''' '''
チャットデータのContinuationContents中にcontinuationがない時に投げる例外 Thrown when continuation is missing in ContinuationContents.
''' '''
pass pass
class IllegalFunctionCall(Exception): class IllegalFunctionCall(Exception):
''' '''
set_callback()を実行済みにもかかわらず Thrown when get () is called even though
get()を呼び出した場合の例外 set_callback () has been executed.
''' '''
pass pass
class InvalidVideoIdException(Exception): class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass pass
class UnknownConnectionError(Exception):
pass

View File

@@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchat' fmt_headers = ['datetime','elapsed','authorName','message','superchat'
,'type','authorChannel'] ,'type','authorChannel']
HEADER_HTML = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
'''
class HTMLArchiver(ChatProcessor): class HTMLArchiver(ChatProcessor):
''' '''
HtmlArchiver saves chat data as HTML table format. HtmlArchiver saves chat data as HTML table format.
@@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor):
super().__init__() super().__init__()
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
with open(self.save_path, mode='a', encoding = 'utf-8') as f: with open(self.save_path, mode='a', encoding = 'utf-8') as f:
f.write(HEADER_HTML)
f.write('<table border="1" style="border-collapse: collapse">') f.write('<table border="1" style="border-collapse: collapse">')
f.writelines(self._parse_html_header(fmt_headers)) f.writelines(self._parse_html_header(fmt_headers))
self.processor = DefaultProcessor() self.processor = DefaultProcessor()

View File

@@ -7,12 +7,15 @@ from . worker import ExtractWorker
from . patch import Patch from . patch import Patch
from ... import config from ... import config
from ... paramgen import arcparam from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from json import JSONDecodeError
from urllib.parse import quote from urllib.parse import quote
headers = config.headers headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation=" "get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
def _split(start, end, count, min_interval_sec = 120): def _split(start, end, count, min_interval_sec = 120):
""" """
@@ -53,13 +56,22 @@ def ready_blocks(video_id, duration, div, callback):
tasks = [_create_block(session, video_id, seektime, callback) tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(-1, duration, div)] for seektime in _split(-1, duration, div)]
return await asyncio.gather(*tasks) return await asyncio.gather(*tasks)
async def _create_block(session, video_id, seektime, callback): async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime = seektime) continuation = arcparam.getparam(video_id, seektime = seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
async with session.get(url, headers = headers) as resp: for _ in range(MAX_RETRY_COUNT):
text = await resp.text() try :
next_continuation, actions = parser.parse(json.loads(text)) async with session.get(url, headers = headers) as resp:
text = await resp.text()
next_continuation, actions = parser.parse(json.loads(text))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions: if actions:
first = parser.get_offset(actions[0]) first = parser.get_offset(actions[0])
last = parser.get_offset(actions[-1]) last = parser.get_offset(actions[-1])
@@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback):
first = first, first = first,
last = last last = last
) )
""" """
fetch initial blocks. fetch initial blocks.
""" """
@@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id):
async def _fetch(continuation,session) -> Patch: async def _fetch(continuation,session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
async with session.get(url,headers = config.headers) as resp: for _ in range(MAX_RETRY_COUNT):
chat_json = await resp.text() try:
continuation, actions = parser.parse(json.loads(chat_json)) async with session.get(url,headers = config.headers) as resp:
chat_json = await resp.text()
continuation, actions = parser.parse(json.loads(chat_json))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions: if actions:
last = parser.get_offset(actions[-1]) last = parser.get_offset(actions[-1])
first = parser.get_offset(actions[0]) first = parser.get_offset(actions[0])
@@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id):
callback(actions, last - first) callback(actions, last - first)
return Patch(actions, continuation, first, last) return Patch(actions, continuation, first, last)
return Patch(continuation = continuation) return Patch(continuation = continuation)
""" """
allocate workers and assign blocks. allocate workers and assign blocks.
""" """

View File

@@ -36,7 +36,7 @@ def test_process_0():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
} }
assert SuperchatCalculator().process([chat_component])=={'': 6800.0, '': 2.0} assert SuperchatCalculator().process([chat_component])=={'': 6800.0, '': 2.0}
@@ -47,7 +47,7 @@ def test_process_1():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
} }
assert SuperchatCalculator().process([chat_component])=={} assert SuperchatCalculator().process([chat_component])=={}
@@ -59,7 +59,7 @@ def test_process_2():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
} }
assert False assert False
SuperchatCalculator().process([chat_component]) SuperchatCalculator().process([chat_component])