Compare commits

...

7 Commits

Author SHA1 Message Date
taizan-hokuto
f77a2c889b Merge branch 'hotfix/not_quit' 2020-09-12 00:57:48 +09:00
taizan-hokuto
47d5ab288f Increment version 2020-09-12 00:49:37 +09:00
taizan-hokuto
5f53fd24dd Format 2020-09-12 00:48:40 +09:00
taizan-hokuto
11a9d0e2d7 Fix a problem with extraction not completing 2020-09-12 00:42:30 +09:00
taizan-hokuto
480c9e15b8 Merge branch 'hotfix/continue_error' 2020-09-11 00:21:07 +09:00
taizan-hokuto
35aa7636f6 Increment version 2020-09-11 00:20:24 +09:00
taizan-hokuto
8fee67c2d4 Fix handling video info error 2020-09-11 00:18:09 +09:00
5 changed files with 36 additions and 16 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.1'
__version__ = '0.2.3'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -2,6 +2,7 @@ import argparse
import os
import signal
import time
from json.decoder import JSONDecodeError
from pathlib import Path
from .arguments import Arguments
@@ -55,7 +56,21 @@ def main():
path = Path(Arguments().output + video_id + '.html')
else:
raise FileNotFoundError
info = VideoInfo(video_id)
err = None
for _ in range(3): # retry 3 times
try:
info = VideoInfo(video_id)
break
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
err = e
time.sleep(2)
continue
else:
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(err.doc, "ERR", ".dat")
continue
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n"
@@ -66,7 +81,7 @@ def main():
print(f" output path: {path.resolve()}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id,
ex = Extractor(video_id,
callback=pbar._disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
@@ -86,8 +101,6 @@ def main():
print()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
@@ -96,14 +109,9 @@ def main():
print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
print("JSONDecodeError.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return

View File

@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
def __init__(self, doc):
self.msg = "InvalidVideoIdException"
self.doc = doc
class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
class RetryExceedMaxCount(Exception):
'''
thrown when the number of retries exceeds the maximum value.
Thrown when the number of retries exceeds the maximum value.
'''
pass
@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):
class VideoInfoParseError(Exception):
'''
thrown when failed to parse video info
Base exception when parsing video info.
'''
class PatternUnmatchError(VideoInfoParseError):
'''
thrown when failed to parse video info with unmatched pattern
Thrown when failed to parse video info with unmatched pattern.
'''
def __init__(self, doc):
self.msg = "PatternUnmatchError"

View File

@@ -16,6 +16,9 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
# Set to avoid duplicate parameters
param_set = set()
def _split(start, end, count, min_interval_sec=120):
"""
@@ -64,6 +67,10 @@ def ready_blocks(video_id, duration, div, callback):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
if continuation in param_set:
next_continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=headers)
next_continuation, actions = parser.parse(resp.json())
break
@@ -112,6 +119,10 @@ def fetch_patch(callback, blocks, video_id):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
if continuation in param_set:
continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=config.headers)
continuation, actions = parser.parse(resp.json())
break

View File

@@ -7,7 +7,6 @@ from typing import Tuple
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.