From 11a9d0e2d7914767b987488a49d395a58d9e0fac Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sat, 12 Sep 2020 00:42:30 +0900 Subject: [PATCH 1/3] Fix a problem with extraction not completing --- pytchat/tool/extract/asyncdl.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py index 55ee1dd..c04b687 100644 --- a/pytchat/tool/extract/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -16,6 +16,9 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ "get_live_chat_replay?continuation=" MAX_RETRY_COUNT = 3 +# Set to avoid duplicate parameters +param_set = set() + def _split(start, end, count, min_interval_sec=120): """ @@ -64,6 +67,10 @@ def ready_blocks(video_id, duration, div, callback): url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" for _ in range(MAX_RETRY_COUNT): try: + if continuation in param_set: + next_continuation, actions = None, [] + break + param_set.add(continuation) resp = await session.get(url, headers=headers) next_continuation, actions = parser.parse(resp.json()) break @@ -112,6 +119,10 @@ def fetch_patch(callback, blocks, video_id): url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" for _ in range(MAX_RETRY_COUNT): try: + if continuation in param_set: + continuation, actions = None, [] + break + param_set.add(continuation) resp = await session.get(url, headers=config.headers) continuation, actions = parser.parse(resp.json()) break From 5f53fd24dd213d7a0a3602235991d7b1c2bbb7b1 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sat, 12 Sep 2020 00:48:40 +0900 Subject: [PATCH 2/3] Format --- pytchat/cli/__init__.py | 6 +++--- pytchat/exceptions.py | 1 - pytchat/tool/extract/worker.py | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 213f8e7..3dd4bc7 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -57,8 +57,8 @@ def main(): else: raise FileNotFoundError err = None - for _ in range(3): # retry 3 times - try: + for _ in range(3): # retry 3 times + try: info = VideoInfo(video_id) break except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e: @@ -81,7 +81,7 @@ def main(): print(f" output path: {path.resolve()}") duration = info.get_duration() pbar = ProgressBar(total=(duration * 1000), status="Extracting") - ex = Extractor(video_id, + ex = Extractor(video_id, callback=pbar._disp, div=10) signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar))) diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index 9e498d1..1bd918c 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -43,7 +43,6 @@ class InvalidVideoIdException(Exception): self.doc = doc - class UnknownConnectionError(Exception): pass diff --git a/pytchat/tool/extract/worker.py b/pytchat/tool/extract/worker.py index f48fc03..261de10 100644 --- a/pytchat/tool/extract/worker.py +++ b/pytchat/tool/extract/worker.py @@ -7,7 +7,6 @@ from typing import Tuple class ExtractWorker: """ ExtractWorker associates a download session with a block. - When the worker finishes fetching, the block being fetched is splitted and assigned the free worker. From 47d5ab288f56991bda54539c0ce4c7a27eecc20a Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sat, 12 Sep 2020 00:49:37 +0900 Subject: [PATCH 3/3] Increment version --- pytchat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 05f72f2..d355558 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,7 +2,7 @@ pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.2.2' +__version__ = '0.2.3' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'