diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 3dd4bc7..f999b04 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -5,9 +5,10 @@ import signal import time from json.decoder import JSONDecodeError from pathlib import Path +from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError from .arguments import Arguments from .progressbar import ProgressBar -from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError +from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError from .. processors.html_archiver import HTMLArchiver from .. tool.extract.extractor import Extractor from .. tool.videoinfo import VideoInfo @@ -50,6 +51,9 @@ def main(): for counter, video_id in enumerate(Arguments().video_ids): if '[' in video_id: video_id = video_id.replace('[', '').replace(']', '') + if len(Arguments().video_ids) > 1: + print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}") + try: video_id = extract_video_id(video_id) if os.path.exists(Arguments().output): @@ -71,8 +75,6 @@ def main(): util.save(err.doc, "ERR", ".dat") continue - if len(Arguments().video_ids) > 1: - print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}") print(f"\n" f" video_id: {video_id}\n" f" channel: {info.get_channel_name()}\n" @@ -112,6 +114,12 @@ def main(): print("JSONDecodeError.:{}".format(video_id)) if Arguments().save_error_data: util.save(e.doc, "ERR_JSON_DECODE", ".dat") + except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e: + print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e)) + except PatternUnmatchError: + print(f"PatternUnmatchError [{video_id}]. ") + except Exception as e: + print(type(e), str(e)) return diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 39cff09..9e08c40 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -1,9 +1,12 @@ +import httpx import os import re -import httpx +import time from base64 import standard_b64encode +from httpx import NetworkError, ReadTimeout from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor +from ..exceptions import UnknownConnectionError PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") @@ -112,7 +115,17 @@ class HTMLArchiver(ChatProcessor): for item in message_items) def _encode_img(self, url): - resp = httpx.get(url) + err = None + for _ in range(3): + try: + resp = httpx.get(url) + break + except (NetworkError, ReadTimeout) as e: + err = e + time.sleep(3) + else: + raise UnknownConnectionError(str(err)) + return standard_b64encode(resp.content).decode() def _set_emoji_table(self, item: dict): diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py index 0a545e7..7dfb1be 100644 --- a/pytchat/tool/extract/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -8,9 +8,11 @@ from ... import config from ... paramgen import arcparam from ... exceptions import UnknownConnectionError from concurrent.futures import CancelledError +from httpx import NetworkError, ReadTimeout from json import JSONDecodeError from urllib.parse import quote + headers = config.headers REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ "get_live_chat_replay?continuation=" @@ -66,6 +68,7 @@ def ready_blocks(video_id, duration, div, callback): async def _create_block(session, video_id, seektime, callback): continuation = arcparam.getparam(video_id, seektime=seektime) url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" + err = None for _ in range(MAX_RETRY_COUNT): try: if continuation in param_set: @@ -77,9 +80,12 @@ def ready_blocks(video_id, duration, div, callback): break except JSONDecodeError: await asyncio.sleep(3) + except (NetworkError, ReadTimeout) as e: + err = e + await asyncio.sleep(3) else: cancel() - raise UnknownConnectionError("Abort: Unknown connection error.") + raise UnknownConnectionError("Abort:" + str(err)) if actions: first = parser.get_offset(actions[0]) @@ -118,6 +124,7 @@ def fetch_patch(callback, blocks, video_id): async def _fetch(continuation, session) -> Patch: url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" + err = None for _ in range(MAX_RETRY_COUNT): try: if continuation in param_set: @@ -129,9 +136,12 @@ def fetch_patch(callback, blocks, video_id): break except JSONDecodeError: await asyncio.sleep(3) + except (NetworkError, ReadTimeout) as e: + err = e + await asyncio.sleep(3) else: cancel() - raise UnknownConnectionError("Abort: Unknown connection error.") + raise UnknownConnectionError("Abort:" + str(err)) if actions: last = parser.get_offset(actions[-1]) diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index 4647f9d..bf32d59 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -93,4 +93,5 @@ class Extractor: return ret def cancel(self): + print("cancel") asyncdl.cancel() diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index a6fb117..e1d1d0a 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -1,10 +1,13 @@ +import httpx import json import re -import httpx +import time +from httpx import ConnectError, NetworkError from .. import config -from ..exceptions import InvalidVideoIdException, PatternUnmatchError +from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError from ..util.extract_video_id import extract_video_id + headers = config.headers pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})") @@ -85,8 +88,18 @@ class VideoInfo: def _get_page_text(self, video_id): url = f"https://www.youtube.com/embed/{video_id}" - resp = httpx.get(url, headers=headers) - resp.raise_for_status() + err = None + for _ in range(3): + try: + resp = httpx.get(url, headers=headers) + resp.raise_for_status() + break + except (ConnectError, NetworkError) as e: + err = e + time.sleep(3) + else: + raise UnknownConnectionError(str(err)) + return resp.text def _parse(self, text):