Compare commits

...

7 Commits

Author SHA1 Message Date
taizan-hokuto
f77a2c889b Merge branch 'hotfix/not_quit' 2020-09-12 00:57:48 +09:00
taizan-hokuto
47d5ab288f Increment version 2020-09-12 00:49:37 +09:00
taizan-hokuto
5f53fd24dd Format 2020-09-12 00:48:40 +09:00
taizan-hokuto
11a9d0e2d7 Fix a problem with extraction not completing 2020-09-12 00:42:30 +09:00
taizan-hokuto
480c9e15b8 Merge branch 'hotfix/continue_error' 2020-09-11 00:21:07 +09:00
taizan-hokuto
35aa7636f6 Increment version 2020-09-11 00:20:24 +09:00
taizan-hokuto
8fee67c2d4 Fix handling video info error 2020-09-11 00:18:09 +09:00
5 changed files with 36 additions and 16 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.1' __version__ = '0.2.3'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -2,6 +2,7 @@ import argparse
import os import os
import signal import signal
import time
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
from .arguments import Arguments from .arguments import Arguments
@@ -55,7 +56,21 @@ def main():
path = Path(Arguments().output + video_id + '.html') path = Path(Arguments().output + video_id + '.html')
else: else:
raise FileNotFoundError raise FileNotFoundError
err = None
for _ in range(3): # retry 3 times
try:
info = VideoInfo(video_id) info = VideoInfo(video_id)
break
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
err = e
time.sleep(2)
continue
else:
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(err.doc, "ERR", ".dat")
continue
if len(Arguments().video_ids) > 1: if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}") print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n" print(f"\n"
@@ -86,8 +101,6 @@ def main():
print() print()
if pbar.is_cancelled(): if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n") print("\nThe extraction process has been discontinued.\n")
except InvalidVideoIdException: except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id) print("Invalid Video ID or URL:", video_id)
except NoContents as e: except NoContents as e:
@@ -96,14 +109,9 @@ def main():
print("The specified directory does not exist.:{}".format(Arguments().output)) print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e: except JSONDecodeError as e:
print(e.msg) print(e.msg)
print("Cannot parse video information.:{}".format(video_id)) print("JSONDecodeError.:{}".format(video_id))
if Arguments().save_error_data: if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat") util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return return

View File

@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
''' '''
Thrown when the video_id is not exist (VideoInfo). Thrown when the video_id is not exist (VideoInfo).
''' '''
pass def __init__(self, doc):
self.msg = "InvalidVideoIdException"
self.doc = doc
class UnknownConnectionError(Exception): class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
class RetryExceedMaxCount(Exception): class RetryExceedMaxCount(Exception):
''' '''
thrown when the number of retries exceeds the maximum value. Thrown when the number of retries exceeds the maximum value.
''' '''
pass pass
@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):
class VideoInfoParseError(Exception): class VideoInfoParseError(Exception):
''' '''
thrown when failed to parse video info Base exception when parsing video info.
''' '''
class PatternUnmatchError(VideoInfoParseError): class PatternUnmatchError(VideoInfoParseError):
''' '''
thrown when failed to parse video info with unmatched pattern Thrown when failed to parse video info with unmatched pattern.
''' '''
def __init__(self, doc): def __init__(self, doc):
self.msg = "PatternUnmatchError" self.msg = "PatternUnmatchError"

View File

@@ -16,6 +16,9 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation=" "get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3 MAX_RETRY_COUNT = 3
# Set to avoid duplicate parameters
param_set = set()
def _split(start, end, count, min_interval_sec=120): def _split(start, end, count, min_interval_sec=120):
""" """
@@ -64,6 +67,10 @@ def ready_blocks(video_id, duration, div, callback):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT): for _ in range(MAX_RETRY_COUNT):
try: try:
if continuation in param_set:
next_continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=headers) resp = await session.get(url, headers=headers)
next_continuation, actions = parser.parse(resp.json()) next_continuation, actions = parser.parse(resp.json())
break break
@@ -112,6 +119,10 @@ def fetch_patch(callback, blocks, video_id):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT): for _ in range(MAX_RETRY_COUNT):
try: try:
if continuation in param_set:
continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=config.headers) resp = await session.get(url, headers=config.headers)
continuation, actions = parser.parse(resp.json()) continuation, actions = parser.parse(resp.json())
break break

View File

@@ -7,7 +7,6 @@ from typing import Tuple
class ExtractWorker: class ExtractWorker:
""" """
ExtractWorker associates a download session with a block. ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker. being fetched is splitted and assigned the free worker.