diff --git a/README.md b/README.md index 0f7bcea..694ad8a 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat. pytchat is a python library for fetching youtube live chat without using youtube api, Selenium or BeautifulSoup. -pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。 +pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。 Other features: -+ Customizable chat data processors including youtube api compatible one. ++ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one. + Available on asyncio context. + Quick fetching of initial chat data by generating continuation params instead of web scraping. diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 390f047..099bf9f 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,7 +2,7 @@ pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.0.6.5' +__version__ = '0.0.6.6' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 0bd99b3..c3c16ac 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -22,9 +22,10 @@ def main(): # Arguments parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, - help='Video IDs separated by commas without space') + help='Video IDs separated by commas without space.\n' + 'If ID starts with a hyphen (-), enclose the ID in square brackets.') parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, - help='Output directory (end with "/")', default='./') + help='Output directory (end with "/"). default="./"', default='./') parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', help='Settings version') Arguments(parser.parse_args().__dict__) @@ -43,11 +44,17 @@ def main(): f" video_id: {video_id}\n" f" channel: {info.get_channel_name()}\n" f" title: {info.get_title()}") + path = Path(Arguments().output+video_id+'.html') + print(f"output path: {path.resolve()}") Extractor(video_id, - processor = HTMLArchiver(Arguments().output+video_id+'.html') + processor = HTMLArchiver(Arguments().output+video_id+'.html'), + callback = _disp_progress ).extract() - print("Extraction end.\n") + print("\nExtraction end.\n") except (InvalidVideoIdException, NoContentsException) as e: print(e) return parser.print_help() + +def _disp_progress(a,b): + print('.',end="",flush=True) diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index cffb955..11e5ba1 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -1,46 +1,52 @@ class ChatParseException(Exception): ''' - チャットデータをパースするライブラリが投げる例外の基底クラス + Base exception thrown by the parser ''' pass class NoYtinitialdataException(ChatParseException): ''' - 配信ページ内にチャットデータurlが見つからないときに投げる例外 + Thrown when the video is not found. ''' pass class ResponseContextError(ChatParseException): ''' - 配信ページでチャットデータ無効の時に投げる例外 + Thrown when chat data is invalid. ''' pass class NoLivechatRendererException(ChatParseException): ''' - チャットデータのJSON中にlivechatRendererがない時に投げる例外 + Thrown when livechatRenderer is missing in JSON. ''' pass class NoContentsException(ChatParseException): ''' - チャットデータのJSON中にContinuationContentsがない時に投げる例外 + Thrown when ContinuationContents is missing in JSON. ''' pass class NoContinuationsException(ChatParseException): ''' - チャットデータのContinuationContents中にcontinuationがない時に投げる例外 + Thrown when continuation is missing in ContinuationContents. ''' pass class IllegalFunctionCall(Exception): ''' - set_callback()を実行済みにもかかわらず - get()を呼び出した場合の例外 + Thrown when get () is called even though + set_callback () has been executed. ''' pass class InvalidVideoIdException(Exception): + ''' + Thrown when the video_id is not exist (VideoInfo). + ''' pass + +class UnknownConnectionError(Exception): + pass \ No newline at end of file diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 45626ab..9249cf4 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") fmt_headers = ['datetime','elapsed','authorName','message','superchat' ,'type','authorChannel'] +HEADER_HTML = ''' + + +''' + class HTMLArchiver(ChatProcessor): ''' HtmlArchiver saves chat data as HTML table format. @@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor): super().__init__() self.save_path = self._checkpath(save_path) with open(self.save_path, mode='a', encoding = 'utf-8') as f: + f.write(HEADER_HTML) f.write('') f.writelines(self._parse_html_header(fmt_headers)) self.processor = DefaultProcessor() diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py index c361754..084f037 100644 --- a/pytchat/tool/extract/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -7,12 +7,15 @@ from . worker import ExtractWorker from . patch import Patch from ... import config from ... paramgen import arcparam +from ... exceptions import UnknownConnectionError from concurrent.futures import CancelledError +from json import JSONDecodeError from urllib.parse import quote headers = config.headers REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ "get_live_chat_replay?continuation=" +MAX_RETRY_COUNT = 3 def _split(start, end, count, min_interval_sec = 120): """ @@ -53,13 +56,22 @@ def ready_blocks(video_id, duration, div, callback): tasks = [_create_block(session, video_id, seektime, callback) for seektime in _split(-1, duration, div)] return await asyncio.gather(*tasks) - + async def _create_block(session, video_id, seektime, callback): continuation = arcparam.getparam(video_id, seektime = seektime) url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" - async with session.get(url, headers = headers) as resp: - text = await resp.text() - next_continuation, actions = parser.parse(json.loads(text)) + for _ in range(MAX_RETRY_COUNT): + try : + async with session.get(url, headers = headers) as resp: + text = await resp.text() + next_continuation, actions = parser.parse(json.loads(text)) + break + except JSONDecodeError: + await asyncio.sleep(3) + else: + cancel() + raise UnknownConnectionError("Abort: Unknown connection error.") + if actions: first = parser.get_offset(actions[0]) last = parser.get_offset(actions[-1]) @@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback): first = first, last = last ) + """ fetch initial blocks. """ @@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id): async def _fetch(continuation,session) -> Patch: url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" - async with session.get(url,headers = config.headers) as resp: - chat_json = await resp.text() - continuation, actions = parser.parse(json.loads(chat_json)) + for _ in range(MAX_RETRY_COUNT): + try: + async with session.get(url,headers = config.headers) as resp: + chat_json = await resp.text() + continuation, actions = parser.parse(json.loads(chat_json)) + break + except JSONDecodeError: + await asyncio.sleep(3) + else: + cancel() + raise UnknownConnectionError("Abort: Unknown connection error.") + if actions: last = parser.get_offset(actions[-1]) first = parser.get_offset(actions[0]) @@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id): callback(actions, last - first) return Patch(actions, continuation, first, last) return Patch(continuation = continuation) + """ allocate workers and assign blocks. """ diff --git a/tests/test_calculator_parse.py b/tests/test_calculator_parse.py index f845182..be0c1bf 100644 --- a/tests/test_calculator_parse.py +++ b/tests/test_calculator_parse.py @@ -36,7 +36,7 @@ def test_process_0(): chat_component = { 'video_id':'', 'timeout':10, - 'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json") + 'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json") } assert SuperchatCalculator().process([chat_component])=={'¥': 6800.0, '€': 2.0} @@ -47,7 +47,7 @@ def test_process_1(): chat_component = { 'video_id':'', 'timeout':10, - 'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json") + 'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json") } assert SuperchatCalculator().process([chat_component])=={} @@ -59,7 +59,7 @@ def test_process_2(): chat_component = { 'video_id':'', 'timeout':10, - 'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json") + 'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json") } assert False SuperchatCalculator().process([chat_component])