Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7e6043a71 | ||
|
|
820ba35013 | ||
|
|
ecd2d130bf | ||
|
|
f77a2c889b | ||
|
|
47d5ab288f | ||
|
|
5f53fd24dd | ||
|
|
11a9d0e2d7 | ||
|
|
480c9e15b8 | ||
|
|
35aa7636f6 | ||
|
|
8fee67c2d4 |
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.2.1'
|
__version__ = '0.2.4'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import argparse
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
|
import time
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from .arguments import Arguments
|
from .arguments import Arguments
|
||||||
@@ -55,7 +56,21 @@ def main():
|
|||||||
path = Path(Arguments().output + video_id + '.html')
|
path = Path(Arguments().output + video_id + '.html')
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
info = VideoInfo(video_id)
|
err = None
|
||||||
|
for _ in range(3): # retry 3 times
|
||||||
|
try:
|
||||||
|
info = VideoInfo(video_id)
|
||||||
|
break
|
||||||
|
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
|
||||||
|
err = e
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print("Cannot parse video information.:{}".format(video_id))
|
||||||
|
if Arguments().save_error_data:
|
||||||
|
util.save(err.doc, "ERR", ".dat")
|
||||||
|
continue
|
||||||
|
|
||||||
if len(Arguments().video_ids) > 1:
|
if len(Arguments().video_ids) > 1:
|
||||||
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
||||||
print(f"\n"
|
print(f"\n"
|
||||||
@@ -66,7 +81,7 @@ def main():
|
|||||||
print(f" output path: {path.resolve()}")
|
print(f" output path: {path.resolve()}")
|
||||||
duration = info.get_duration()
|
duration = info.get_duration()
|
||||||
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
||||||
ex = Extractor(video_id,
|
ex = Extractor(video_id,
|
||||||
callback=pbar._disp,
|
callback=pbar._disp,
|
||||||
div=10)
|
div=10)
|
||||||
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
||||||
@@ -86,8 +101,6 @@ def main():
|
|||||||
print()
|
print()
|
||||||
if pbar.is_cancelled():
|
if pbar.is_cancelled():
|
||||||
print("\nThe extraction process has been discontinued.\n")
|
print("\nThe extraction process has been discontinued.\n")
|
||||||
|
|
||||||
|
|
||||||
except InvalidVideoIdException:
|
except InvalidVideoIdException:
|
||||||
print("Invalid Video ID or URL:", video_id)
|
print("Invalid Video ID or URL:", video_id)
|
||||||
except NoContents as e:
|
except NoContents as e:
|
||||||
@@ -96,14 +109,9 @@ def main():
|
|||||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
print("The specified directory does not exist.:{}".format(Arguments().output))
|
||||||
except JSONDecodeError as e:
|
except JSONDecodeError as e:
|
||||||
print(e.msg)
|
print(e.msg)
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
print("JSONDecodeError.:{}".format(video_id))
|
||||||
if Arguments().save_error_data:
|
if Arguments().save_error_data:
|
||||||
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
||||||
except PatternUnmatchError as e:
|
|
||||||
print(e.msg)
|
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
|
||||||
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
|
|||||||
'''
|
'''
|
||||||
Thrown when the video_id is not exist (VideoInfo).
|
Thrown when the video_id is not exist (VideoInfo).
|
||||||
'''
|
'''
|
||||||
pass
|
def __init__(self, doc):
|
||||||
|
self.msg = "InvalidVideoIdException"
|
||||||
|
self.doc = doc
|
||||||
|
|
||||||
|
|
||||||
class UnknownConnectionError(Exception):
|
class UnknownConnectionError(Exception):
|
||||||
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
|
|||||||
|
|
||||||
class RetryExceedMaxCount(Exception):
|
class RetryExceedMaxCount(Exception):
|
||||||
'''
|
'''
|
||||||
thrown when the number of retries exceeds the maximum value.
|
Thrown when the number of retries exceeds the maximum value.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):
|
|||||||
|
|
||||||
class VideoInfoParseError(Exception):
|
class VideoInfoParseError(Exception):
|
||||||
'''
|
'''
|
||||||
thrown when failed to parse video info
|
Base exception when parsing video info.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
class PatternUnmatchError(VideoInfoParseError):
|
class PatternUnmatchError(VideoInfoParseError):
|
||||||
'''
|
'''
|
||||||
thrown when failed to parse video info with unmatched pattern
|
Thrown when failed to parse video info with unmatched pattern.
|
||||||
'''
|
'''
|
||||||
def __init__(self, doc):
|
def __init__(self, doc):
|
||||||
self.msg = "PatternUnmatchError"
|
self.msg = "PatternUnmatchError"
|
||||||
|
|||||||
@@ -16,6 +16,9 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
|||||||
"get_live_chat_replay?continuation="
|
"get_live_chat_replay?continuation="
|
||||||
MAX_RETRY_COUNT = 3
|
MAX_RETRY_COUNT = 3
|
||||||
|
|
||||||
|
# Set to avoid duplicate parameters
|
||||||
|
param_set = set()
|
||||||
|
|
||||||
|
|
||||||
def _split(start, end, count, min_interval_sec=120):
|
def _split(start, end, count, min_interval_sec=120):
|
||||||
"""
|
"""
|
||||||
@@ -50,6 +53,7 @@ def _split(start, end, count, min_interval_sec=120):
|
|||||||
|
|
||||||
|
|
||||||
def ready_blocks(video_id, duration, div, callback):
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
|
param_set.clear()
|
||||||
if div <= 0:
|
if div <= 0:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
@@ -64,6 +68,10 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
|
if continuation in param_set:
|
||||||
|
next_continuation, actions = None, []
|
||||||
|
break
|
||||||
|
param_set.add(continuation)
|
||||||
resp = await session.get(url, headers=headers)
|
resp = await session.get(url, headers=headers)
|
||||||
next_continuation, actions = parser.parse(resp.json())
|
next_continuation, actions = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
@@ -112,6 +120,10 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
|
if continuation in param_set:
|
||||||
|
continuation, actions = None, []
|
||||||
|
break
|
||||||
|
param_set.add(continuation)
|
||||||
resp = await session.get(url, headers=config.headers)
|
resp = await session.get(url, headers=config.headers)
|
||||||
continuation, actions = parser.parse(resp.json())
|
continuation, actions = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from typing import Tuple
|
|||||||
class ExtractWorker:
|
class ExtractWorker:
|
||||||
"""
|
"""
|
||||||
ExtractWorker associates a download session with a block.
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
When the worker finishes fetching, the block
|
When the worker finishes fetching, the block
|
||||||
being fetched is splitted and assigned the free worker.
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user