Merge tag 'json_decode_error' into develop
v0.0.6.6
This commit is contained in:
@@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat.
|
|||||||
pytchat is a python library for fetching youtube live chat
|
pytchat is a python library for fetching youtube live chat
|
||||||
without using youtube api, Selenium or BeautifulSoup.
|
without using youtube api, Selenium or BeautifulSoup.
|
||||||
|
|
||||||
pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。
|
pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
|
||||||
|
|
||||||
Other features:
|
Other features:
|
||||||
+ Customizable chat data processors including youtube api compatible one.
|
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
|
||||||
+ Available on asyncio context.
|
+ Available on asyncio context.
|
||||||
+ Quick fetching of initial chat data by generating continuation params
|
+ Quick fetching of initial chat data by generating continuation params
|
||||||
instead of web scraping.
|
instead of web scraping.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
|
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.0.6.5'
|
__version__ = '0.0.6.6'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -22,9 +22,10 @@ def main():
|
|||||||
# Arguments
|
# Arguments
|
||||||
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
||||||
help='Video IDs separated by commas without space')
|
help='Video IDs separated by commas without space.\n'
|
||||||
|
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||||
help='Output directory (end with "/")', default='./')
|
help='Output directory (end with "/"). default="./"', default='./')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Settings version')
|
help='Settings version')
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
@@ -43,11 +44,17 @@ def main():
|
|||||||
f" video_id: {video_id}\n"
|
f" video_id: {video_id}\n"
|
||||||
f" channel: {info.get_channel_name()}\n"
|
f" channel: {info.get_channel_name()}\n"
|
||||||
f" title: {info.get_title()}")
|
f" title: {info.get_title()}")
|
||||||
|
path = Path(Arguments().output+video_id+'.html')
|
||||||
|
print(f"output path: {path.resolve()}")
|
||||||
Extractor(video_id,
|
Extractor(video_id,
|
||||||
processor = HTMLArchiver(Arguments().output+video_id+'.html')
|
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
|
||||||
|
callback = _disp_progress
|
||||||
).extract()
|
).extract()
|
||||||
print("Extraction end.\n")
|
print("\nExtraction end.\n")
|
||||||
except (InvalidVideoIdException, NoContentsException) as e:
|
except (InvalidVideoIdException, NoContentsException) as e:
|
||||||
print(e)
|
print(e)
|
||||||
return
|
return
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
|
|
||||||
|
def _disp_progress(a,b):
|
||||||
|
print('.',end="",flush=True)
|
||||||
|
|||||||
@@ -1,46 +1,52 @@
|
|||||||
class ChatParseException(Exception):
|
class ChatParseException(Exception):
|
||||||
'''
|
'''
|
||||||
チャットデータをパースするライブラリが投げる例外の基底クラス
|
Base exception thrown by the parser
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoYtinitialdataException(ChatParseException):
|
class NoYtinitialdataException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
配信ページ内にチャットデータurlが見つからないときに投げる例外
|
Thrown when the video is not found.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class ResponseContextError(ChatParseException):
|
class ResponseContextError(ChatParseException):
|
||||||
'''
|
'''
|
||||||
配信ページでチャットデータ無効の時に投げる例外
|
Thrown when chat data is invalid.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoLivechatRendererException(ChatParseException):
|
class NoLivechatRendererException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのJSON中にlivechatRendererがない時に投げる例外
|
Thrown when livechatRenderer is missing in JSON.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class NoContentsException(ChatParseException):
|
class NoContentsException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのJSON中にContinuationContentsがない時に投げる例外
|
Thrown when ContinuationContents is missing in JSON.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoContinuationsException(ChatParseException):
|
class NoContinuationsException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのContinuationContents中にcontinuationがない時に投げる例外
|
Thrown when continuation is missing in ContinuationContents.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class IllegalFunctionCall(Exception):
|
class IllegalFunctionCall(Exception):
|
||||||
'''
|
'''
|
||||||
set_callback()を実行済みにもかかわらず
|
Thrown when get () is called even though
|
||||||
get()を呼び出した場合の例外
|
set_callback () has been executed.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class InvalidVideoIdException(Exception):
|
class InvalidVideoIdException(Exception):
|
||||||
|
'''
|
||||||
|
Thrown when the video_id is not exist (VideoInfo).
|
||||||
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class UnknownConnectionError(Exception):
|
||||||
|
pass
|
||||||
@@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
|||||||
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
|
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
|
||||||
,'type','authorChannel']
|
,'type','authorChannel']
|
||||||
|
|
||||||
|
HEADER_HTML = '''
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||||
|
'''
|
||||||
|
|
||||||
class HTMLArchiver(ChatProcessor):
|
class HTMLArchiver(ChatProcessor):
|
||||||
'''
|
'''
|
||||||
HtmlArchiver saves chat data as HTML table format.
|
HtmlArchiver saves chat data as HTML table format.
|
||||||
@@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.save_path = self._checkpath(save_path)
|
self.save_path = self._checkpath(save_path)
|
||||||
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
f.write(HEADER_HTML)
|
||||||
f.write('<table border="1" style="border-collapse: collapse">')
|
f.write('<table border="1" style="border-collapse: collapse">')
|
||||||
f.writelines(self._parse_html_header(fmt_headers))
|
f.writelines(self._parse_html_header(fmt_headers))
|
||||||
self.processor = DefaultProcessor()
|
self.processor = DefaultProcessor()
|
||||||
|
|||||||
@@ -7,12 +7,15 @@ from . worker import ExtractWorker
|
|||||||
from . patch import Patch
|
from . patch import Patch
|
||||||
from ... import config
|
from ... import config
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
|
from ... exceptions import UnknownConnectionError
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
|
from json import JSONDecodeError
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
||||||
"get_live_chat_replay?continuation="
|
"get_live_chat_replay?continuation="
|
||||||
|
MAX_RETRY_COUNT = 3
|
||||||
|
|
||||||
def _split(start, end, count, min_interval_sec = 120):
|
def _split(start, end, count, min_interval_sec = 120):
|
||||||
"""
|
"""
|
||||||
@@ -53,13 +56,22 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
tasks = [_create_block(session, video_id, seektime, callback)
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
for seektime in _split(-1, duration, div)]
|
for seektime in _split(-1, duration, div)]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
async def _create_block(session, video_id, seektime, callback):
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
continuation = arcparam.getparam(video_id, seektime = seektime)
|
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
async with session.get(url, headers = headers) as resp:
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
text = await resp.text()
|
try :
|
||||||
next_continuation, actions = parser.parse(json.loads(text))
|
async with session.get(url, headers = headers) as resp:
|
||||||
|
text = await resp.text()
|
||||||
|
next_continuation, actions = parser.parse(json.loads(text))
|
||||||
|
break
|
||||||
|
except JSONDecodeError:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
else:
|
||||||
|
cancel()
|
||||||
|
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
last = parser.get_offset(actions[-1])
|
last = parser.get_offset(actions[-1])
|
||||||
@@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
first = first,
|
first = first,
|
||||||
last = last
|
last = last
|
||||||
)
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
fetch initial blocks.
|
fetch initial blocks.
|
||||||
"""
|
"""
|
||||||
@@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
|
|
||||||
async def _fetch(continuation,session) -> Patch:
|
async def _fetch(continuation,session) -> Patch:
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
async with session.get(url,headers = config.headers) as resp:
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
chat_json = await resp.text()
|
try:
|
||||||
continuation, actions = parser.parse(json.loads(chat_json))
|
async with session.get(url,headers = config.headers) as resp:
|
||||||
|
chat_json = await resp.text()
|
||||||
|
continuation, actions = parser.parse(json.loads(chat_json))
|
||||||
|
break
|
||||||
|
except JSONDecodeError:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
else:
|
||||||
|
cancel()
|
||||||
|
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
last = parser.get_offset(actions[-1])
|
last = parser.get_offset(actions[-1])
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
@@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
callback(actions, last - first)
|
callback(actions, last - first)
|
||||||
return Patch(actions, continuation, first, last)
|
return Patch(actions, continuation, first, last)
|
||||||
return Patch(continuation = continuation)
|
return Patch(continuation = continuation)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
allocate workers and assign blocks.
|
allocate workers and assign blocks.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ def test_process_0():
|
|||||||
chat_component = {
|
chat_component = {
|
||||||
'video_id':'',
|
'video_id':'',
|
||||||
'timeout':10,
|
'timeout':10,
|
||||||
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json")
|
'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
|
||||||
}
|
}
|
||||||
assert SuperchatCalculator().process([chat_component])=={'¥': 6800.0, '€': 2.0}
|
assert SuperchatCalculator().process([chat_component])=={'¥': 6800.0, '€': 2.0}
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ def test_process_1():
|
|||||||
chat_component = {
|
chat_component = {
|
||||||
'video_id':'',
|
'video_id':'',
|
||||||
'timeout':10,
|
'timeout':10,
|
||||||
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json")
|
'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
|
||||||
}
|
}
|
||||||
assert SuperchatCalculator().process([chat_component])=={}
|
assert SuperchatCalculator().process([chat_component])=={}
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ def test_process_2():
|
|||||||
chat_component = {
|
chat_component = {
|
||||||
'video_id':'',
|
'video_id':'',
|
||||||
'timeout':10,
|
'timeout':10,
|
||||||
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json")
|
'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
|
||||||
}
|
}
|
||||||
assert False
|
assert False
|
||||||
SuperchatCalculator().process([chat_component])
|
SuperchatCalculator().process([chat_component])
|
||||||
|
|||||||
Reference in New Issue
Block a user