Merge branch 'hotfix/network'

Increment version
Fix handling network error
2020-09-14 00:40:40 +09:00 · 2020-09-14 00:29:21 +09:00 · 2020-09-14 00:28:41 +09:00 · 2020-09-12 02:12:46 +09:00 · 2020-09-12 02:02:07 +09:00 · 2020-09-12 01:57:55 +09:00
8 changed files with 72 additions and 17 deletions
--- a/pytchat/init.py
+++ b/pytchat/init.py
@@ -2,7 +2,7 @@
 pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
 """
 __copyright__    = 'Copyright (C) 2019 taizan-hokuto'
-__version__      = '0.2.2'
+__version__      = '0.2.5'
 __license__      = 'MIT'
 __author__       = 'taizan-hokuto'
 __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -5,9 +5,10 @@ import signal
 import time
 from json.decoder import JSONDecodeError
 from pathlib import Path
+from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
 from .arguments import Arguments
 from .progressbar import ProgressBar
-from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
+from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
 from .. processors.html_archiver import HTMLArchiver
 from .. tool.extract.extractor import Extractor
 from .. tool.videoinfo import VideoInfo
@@ -50,6 +51,9 @@ def main():
    for counter, video_id in enumerate(Arguments().video_ids):
        if '[' in video_id:
            video_id = video_id.replace('[', '').replace(']', '')
+        if len(Arguments().video_ids) > 1:
+            print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
+
        try:
            video_id = extract_video_id(video_id)
            if os.path.exists(Arguments().output):
@@ -57,8 +61,8 @@ def main():
            else:
                raise FileNotFoundError
            err = None
-            for _ in range(3): # retry 3 times
-                try:                
+            for _ in range(3):  # retry 3 times
+                try:
                    info = VideoInfo(video_id)
                    break
                except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
@@ -71,8 +75,6 @@ def main():
                    util.save(err.doc, "ERR", ".dat")
                continue

-            if len(Arguments().video_ids) > 1:
-                print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
            print(f"\n"
                  f" video_id: {video_id}\n"
                  f" channel:  {info.get_channel_name()}\n"
@@ -81,7 +83,7 @@ def main():
            print(f" output path: {path.resolve()}")
            duration = info.get_duration()
            pbar = ProgressBar(total=(duration * 1000), status="Extracting")
-            ex = Extractor(video_id,               
+            ex = Extractor(video_id,
                    callback=pbar._disp,
                    div=10)
            signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
@@ -112,6 +114,12 @@ def main():
            print("JSONDecodeError.:{}".format(video_id))
            if Arguments().save_error_data:
                util.save(e.doc, "ERR_JSON_DECODE", ".dat")
+        except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
+            print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
+        except PatternUnmatchError:
+            print(f"PatternUnmatchError [{video_id}]. ")
+        except Exception as e:
+            print(type(e), str(e))

    return

--- a/pytchat/exceptions.py
+++ b/pytchat/exceptions.py
@@ -43,7 +43,6 @@ class InvalidVideoIdException(Exception):
        self.doc = doc


-
 class UnknownConnectionError(Exception):
    pass

--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -1,9 +1,12 @@
+import httpx
 import os
 import re
-import httpx
+import time
 from base64 import standard_b64encode
+from httpx import NetworkError, ReadTimeout
 from .chat_processor import ChatProcessor
 from .default.processor import DefaultProcessor
+from ..exceptions import UnknownConnectionError


 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -112,7 +115,17 @@ class HTMLArchiver(ChatProcessor):
                       for item in message_items)

    def _encode_img(self, url):
-        resp = httpx.get(url)
+        err = None
+        for _ in range(3):
+            try:
+                resp = httpx.get(url)
+                break
+            except (NetworkError, ReadTimeout) as e:
+                err = e
+                time.sleep(3)
+        else:
+            raise UnknownConnectionError(str(err))
+
        return standard_b64encode(resp.content).decode()

    def _set_emoji_table(self, item: dict):
--- a/pytchat/tool/extract/asyncdl.py
+++ b/pytchat/tool/extract/asyncdl.py
@@ -8,14 +8,19 @@ from ... import config
 from ... paramgen import arcparam
 from ... exceptions import UnknownConnectionError
 from concurrent.futures import CancelledError
+from httpx import NetworkError, ReadTimeout
 from json import JSONDecodeError
 from urllib.parse import quote

+
 headers = config.headers
 REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
             "get_live_chat_replay?continuation="
 MAX_RETRY_COUNT = 3

+# Set to avoid duplicate parameters
+param_set = set()
+

 def _split(start, end, count, min_interval_sec=120):
    """
@@ -50,6 +55,7 @@ def _split(start, end, count, min_interval_sec=120):


 def ready_blocks(video_id, duration, div, callback):
+    param_set.clear()
    if div <= 0:
        raise ValueError

@@ -62,16 +68,24 @@ def ready_blocks(video_id, duration, div, callback):
    async def _create_block(session, video_id, seektime, callback):
        continuation = arcparam.getparam(video_id, seektime=seektime)
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
+                if continuation in param_set:
+                    next_continuation, actions = None, []
+                    break
+                param_set.add(continuation)
                resp = await session.get(url, headers=headers)
                next_continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
+            except (NetworkError, ReadTimeout) as e:
+                err = e
+                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))

        if actions:
            first = parser.get_offset(actions[0])
@@ -110,16 +124,24 @@ def fetch_patch(callback, blocks, video_id):

    async def _fetch(continuation, session) -> Patch:
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
+                if continuation in param_set:
+                    continuation, actions = None, []
+                    break
+                param_set.add(continuation)
                resp = await session.get(url, headers=config.headers)
                continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
+            except (NetworkError, ReadTimeout) as e:
+                err = e
+                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))

        if actions:
            last = parser.get_offset(actions[-1])
--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -93,4 +93,5 @@ class Extractor:
        return ret

    def cancel(self):
+        print("cancel")
        asyncdl.cancel()
--- a/pytchat/tool/extract/worker.py
+++ b/pytchat/tool/extract/worker.py
@@ -7,7 +7,6 @@ from typing import Tuple
 class ExtractWorker:
    """
    ExtractWorker associates a download session with a block.
-
    When the worker finishes fetching, the block
    being fetched is splitted and assigned the free worker.

--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -1,10 +1,13 @@
+import httpx
 import json
 import re
-import httpx
+import time
+from httpx import ConnectError, NetworkError
 from .. import config
-from ..exceptions import InvalidVideoIdException, PatternUnmatchError
+from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
 from ..util.extract_video_id import extract_video_id

+
 headers = config.headers

 pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
@@ -85,8 +88,18 @@ class VideoInfo:

    def _get_page_text(self, video_id):
        url = f"https://www.youtube.com/embed/{video_id}"
-        resp = httpx.get(url, headers=headers)
-        resp.raise_for_status()
+        err = None
+        for _ in range(3):
+            try:
+                resp = httpx.get(url, headers=headers)
+                resp.raise_for_status()
+                break
+            except (ConnectError, NetworkError) as e:
+                err = e
+                time.sleep(3)
+        else:
+            raise UnknownConnectionError(str(err))
+
        return resp.text

    def _parse(self, text):
Author	SHA1	Message	Date
taizan-hokuto	e29b3b8377	Merge branch 'hotfix/network'	2020-09-14 00:40:40 +09:00
taizan-hokuto	0859ed5fb1	Increment version	2020-09-14 00:29:21 +09:00
taizan-hokuto	a80d5ba080	Fix handling network error	2020-09-14 00:28:41 +09:00
taizan-hokuto	b7e6043a71	Merge branch 'hotfix/memory'	2020-09-12 02:12:46 +09:00
taizan-hokuto	820ba35013	Increment version	2020-09-12 02:02:07 +09:00
taizan-hokuto	ecd2d130bf	Clear set each time the extraction changes	2020-09-12 01:57:55 +09:00
taizan-hokuto	f77a2c889b	Merge branch 'hotfix/not_quit'	2020-09-12 00:57:48 +09:00
taizan-hokuto	47d5ab288f	Increment version	2020-09-12 00:49:37 +09:00
taizan-hokuto	5f53fd24dd	Format	2020-09-12 00:48:40 +09:00
taizan-hokuto	11a9d0e2d7	Fix a problem with extraction not completing	2020-09-12 00:42:30 +09:00