Compare commits

...

5 Commits

Author SHA1 Message Date
taizan-hokuto
3243d69d7a Merge branch 'hotfix/json_decode_error' 2020-03-14 09:43:37 +09:00
taizan-hokuto
6e1b735ebc Increment version 2020-03-14 09:42:53 +09:00
taizan-hokuto
c54481dad5 Add header html and show progress 2020-03-14 09:26:28 +09:00
taizan-hokuto
78604c84d4 Fix testdata path separator 2020-03-14 08:16:19 +09:00
taizan-hokuto
21d93613a2 Handling JSONDecodeError 2020-03-14 08:00:31 +09:00
7 changed files with 67 additions and 25 deletions

View File

@@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat.
pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup.
pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。
pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
Other features:
+ Customizable chat data processors including youtube api compatible one.
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
+ Available on asyncio context.
+ Quick fetching of initial chat data by generating continuation params
instead of web scraping.

View File

@@ -2,7 +2,7 @@
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.0.6.5'
__version__ = '0.0.6.6'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -22,9 +22,10 @@ def main():
# Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
help='Video IDs separated by commas without space')
help='Video IDs separated by commas without space.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/")', default='./')
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version')
Arguments(parser.parse_args().__dict__)
@@ -43,11 +44,17 @@ def main():
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
path = Path(Arguments().output+video_id+'.html')
print(f"output path: {path.resolve()}")
Extractor(video_id,
processor = HTMLArchiver(Arguments().output+video_id+'.html')
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
callback = _disp_progress
).extract()
print("Extraction end.\n")
print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContentsException) as e:
print(e)
return
parser.print_help()
def _disp_progress(a,b):
print('.',end="",flush=True)

View File

@@ -1,46 +1,52 @@
class ChatParseException(Exception):
'''
チャットデータをパースするライブラリが投げる例外の基底クラス
Base exception thrown by the parser
'''
pass
class NoYtinitialdataException(ChatParseException):
'''
配信ページ内にチャットデータurlが見つからないときに投げる例外
Thrown when the video is not found.
'''
pass
class ResponseContextError(ChatParseException):
'''
配信ページでチャットデータ無効の時に投げる例外
Thrown when chat data is invalid.
'''
pass
class NoLivechatRendererException(ChatParseException):
'''
チャットデータのJSON中にlivechatRendererがない時に投げる例外
Thrown when livechatRenderer is missing in JSON.
'''
pass
class NoContentsException(ChatParseException):
'''
チャットデータのJSON中にContinuationContentsがない時に投げる例外
Thrown when ContinuationContents is missing in JSON.
'''
pass
class NoContinuationsException(ChatParseException):
'''
チャットデータのContinuationContents中にcontinuationがない時に投げる例外
Thrown when continuation is missing in ContinuationContents.
'''
pass
class IllegalFunctionCall(Exception):
'''
set_callback()を実行済みにもかかわらず
get()を呼び出した場合の例外
Thrown when get () is called even though
set_callback () has been executed.
'''
pass
class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
class UnknownConnectionError(Exception):
pass

View File

@@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
,'type','authorChannel']
HEADER_HTML = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
'''
class HTMLArchiver(ChatProcessor):
'''
HtmlArchiver saves chat data as HTML table format.
@@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor):
super().__init__()
self.save_path = self._checkpath(save_path)
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
f.write(HEADER_HTML)
f.write('<table border="1" style="border-collapse: collapse">')
f.writelines(self._parse_html_header(fmt_headers))
self.processor = DefaultProcessor()

View File

@@ -7,12 +7,15 @@ from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError
from json import JSONDecodeError
from urllib.parse import quote
headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
def _split(start, end, count, min_interval_sec = 120):
"""
@@ -53,13 +56,22 @@ def ready_blocks(video_id, duration, div, callback):
tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(-1, duration, div)]
return await asyncio.gather(*tasks)
async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime = seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
async with session.get(url, headers = headers) as resp:
text = await resp.text()
next_continuation, actions = parser.parse(json.loads(text))
for _ in range(MAX_RETRY_COUNT):
try :
async with session.get(url, headers = headers) as resp:
text = await resp.text()
next_continuation, actions = parser.parse(json.loads(text))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions:
first = parser.get_offset(actions[0])
last = parser.get_offset(actions[-1])
@@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback):
first = first,
last = last
)
"""
fetch initial blocks.
"""
@@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id):
async def _fetch(continuation,session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
async with session.get(url,headers = config.headers) as resp:
chat_json = await resp.text()
continuation, actions = parser.parse(json.loads(chat_json))
for _ in range(MAX_RETRY_COUNT):
try:
async with session.get(url,headers = config.headers) as resp:
chat_json = await resp.text()
continuation, actions = parser.parse(json.loads(chat_json))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions:
last = parser.get_offset(actions[-1])
first = parser.get_offset(actions[0])
@@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id):
callback(actions, last - first)
return Patch(actions, continuation, first, last)
return Patch(continuation = continuation)
"""
allocate workers and assign blocks.
"""

View File

@@ -36,7 +36,7 @@ def test_process_0():
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json")
'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
}
assert SuperchatCalculator().process([chat_component])=={'': 6800.0, '': 2.0}
@@ -47,7 +47,7 @@ def test_process_1():
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json")
'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
}
assert SuperchatCalculator().process([chat_component])=={}
@@ -59,7 +59,7 @@ def test_process_2():
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json")
'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
}
assert False
SuperchatCalculator().process([chat_component])