Compare commits

..

13 Commits

Author SHA1 Message Date
taizan-hokuto
f77a2c889b Merge branch 'hotfix/not_quit' 2020-09-12 00:57:48 +09:00
taizan-hokuto
47d5ab288f Increment version 2020-09-12 00:49:37 +09:00
taizan-hokuto
5f53fd24dd Format 2020-09-12 00:48:40 +09:00
taizan-hokuto
11a9d0e2d7 Fix a problem with extraction not completing 2020-09-12 00:42:30 +09:00
taizan-hokuto
480c9e15b8 Merge branch 'hotfix/continue_error' 2020-09-11 00:21:07 +09:00
taizan-hokuto
35aa7636f6 Increment version 2020-09-11 00:20:24 +09:00
taizan-hokuto
8fee67c2d4 Fix handling video info error 2020-09-11 00:18:09 +09:00
taizan-hokuto
d3f1643a40 Merge branch 'release/v0.2.1' 2020-09-09 22:23:01 +09:00
taizan-hokuto
eb29f27493 Increment version 2020-09-09 22:22:31 +09:00
taizan-hokuto
8adf75ab83 Merge branch 'feature/pbar' into develop 2020-09-09 22:20:36 +09:00
taizan-hokuto
2e05803d75 Remove unnecessary option 2020-09-09 22:20:09 +09:00
taizan-hokuto
f16c0ee73a Fix progress bar line feed and remove pbar option 2020-09-09 22:19:10 +09:00
taizan-hokuto
a338f2b782 Merge tag 'v0.2.0' into develop
v0.2.0
2020-09-07 23:35:45 +09:00
8 changed files with 51 additions and 90 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.0'
__version__ = '0.2.3'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -1,11 +1,12 @@
import argparse
import os
import sys
import signal
import time
from json.decoder import JSONDecodeError
from pathlib import Path
from .arguments import Arguments
from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
@@ -32,18 +33,12 @@ def main():
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.PBAR}', action='store_true',
help='Display rich progress bar')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
help='Save error data when error occurs(".dat" file)')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
Arguments(parser.parse_args().__dict__)
if Arguments().pbar:
from .progressbar_rich import ProgressBar
else:
from .progressbar_simple import ProgressBar
if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return
@@ -52,7 +47,7 @@ def main():
if not Arguments().video_ids:
parser.print_help()
return
for video_id in Arguments().video_ids:
for counter, video_id in enumerate(Arguments().video_ids):
if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '')
try:
@@ -61,8 +56,24 @@ def main():
path = Path(Arguments().output + video_id + '.html')
else:
raise FileNotFoundError
info = VideoInfo(video_id)
print(f"Extracting...\n"
err = None
for _ in range(3): # retry 3 times
try:
info = VideoInfo(video_id)
break
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
err = e
time.sleep(2)
continue
else:
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(err.doc, "ERR", ".dat")
continue
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
@@ -70,17 +81,14 @@ def main():
print(f" output path: {path.resolve()}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id,
ex = Extractor(video_id,
callback=pbar._disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
data = ex.extract()
if data == []:
return False
if Arguments().pbar:
pbar.reset("#", "=", total=len(data), status="Rendering ")
else:
pbar.reset("=", "", total=len(data), status="Rendering ")
pbar.reset("#", "=", total=len(data), status="Rendering ")
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
processor.process(
[{'video_id': None,
@@ -88,19 +96,11 @@ def main():
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
if Arguments().pbar:
pbar.reset('#', '#', status='Completed ')
pbar.close()
else:
pbar.close()
print("\nCompleted")
pbar.reset('#', '#', status='Completed ')
pbar.close()
print()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
return False
return True
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
@@ -109,14 +109,9 @@ def main():
print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
print("JSONDecodeError.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return

View File

@@ -19,7 +19,6 @@ class Arguments(metaclass=Singleton):
OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data'
PBAR: bool ='pbar'
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -37,7 +36,7 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
self.pbar: bool = arguments[Arguments.Name.PBAR]
# Videos
if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id

View File

@@ -4,6 +4,7 @@ vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License)
'''
import shutil
import sys
@@ -13,8 +14,9 @@ class ProgressBar:
self._cancelled = False
self.reset(total=total, status=status)
self._blinker = 0
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
@@ -37,7 +39,9 @@ class ProgressBar:
bar = self._symbol_done * filled_len + \
self._symbol_space * (self._bar_len - filled_len)
sys.stdout.write(' [%s] %s%s ...%s \r' % (bar, percents, '%', self._status))
disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
sys.stdout.write(disp)
sys.stdout.flush()
self._blinker += 1

View File

@@ -1,49 +0,0 @@
'''
This code for this progress bar is based on
vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License)
'''
import sys
class ProgressBar:
def __init__(self, total, status):
self._bar_len = 60
self._cancelled = False
print(''.join([' ' * 10, '|', '-' * (self._bar_len), '|']), end="")
self.reset(total=total, status=status)
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
self._status = status
self._old_len = 0
self._count = 0
print()
print(f'{status:<11}', end='')
def _disp(self, _, fetched):
self._progress(fetched, self._total)
def _progress(self, fillin, total):
if total == 0 or self._cancelled:
return
self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total)))
if filled_len > self._bar_len:
filled_len = self._bar_len
print((filled_len - self._old_len) * self._symbol_done, end="")
sys.stdout.flush()
self._old_len = filled_len
def close(self):
if not self._cancelled:
self._progress(self._total, self._total)
def cancel(self):
self._cancelled = True
def is_cancelled(self):
return self._cancelled

View File

@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
def __init__(self, doc):
self.msg = "InvalidVideoIdException"
self.doc = doc
class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
class RetryExceedMaxCount(Exception):
'''
thrown when the number of retries exceeds the maximum value.
Thrown when the number of retries exceeds the maximum value.
'''
pass
@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):
class VideoInfoParseError(Exception):
'''
thrown when failed to parse video info
Base exception when parsing video info.
'''
class PatternUnmatchError(VideoInfoParseError):
'''
thrown when failed to parse video info with unmatched pattern
Thrown when failed to parse video info with unmatched pattern.
'''
def __init__(self, doc):
self.msg = "PatternUnmatchError"

View File

@@ -16,6 +16,9 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
# Set to avoid duplicate parameters
param_set = set()
def _split(start, end, count, min_interval_sec=120):
"""
@@ -64,6 +67,10 @@ def ready_blocks(video_id, duration, div, callback):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
if continuation in param_set:
next_continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=headers)
next_continuation, actions = parser.parse(resp.json())
break
@@ -112,6 +119,10 @@ def fetch_patch(callback, blocks, video_id):
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
if continuation in param_set:
continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=config.headers)
continuation, actions = parser.parse(resp.json())
break

View File

@@ -7,7 +7,6 @@ from typing import Tuple
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.