Compare commits

...

9 Commits

Author SHA1 Message Date
taizan-hokuto
480c9e15b8 Merge branch 'hotfix/continue_error' 2020-09-11 00:21:07 +09:00
taizan-hokuto
35aa7636f6 Increment version 2020-09-11 00:20:24 +09:00
taizan-hokuto
8fee67c2d4 Fix handling video info error 2020-09-11 00:18:09 +09:00
taizan-hokuto
d3f1643a40 Merge branch 'release/v0.2.1' 2020-09-09 22:23:01 +09:00
taizan-hokuto
eb29f27493 Increment version 2020-09-09 22:22:31 +09:00
taizan-hokuto
8adf75ab83 Merge branch 'feature/pbar' into develop 2020-09-09 22:20:36 +09:00
taizan-hokuto
2e05803d75 Remove unnecessary option 2020-09-09 22:20:09 +09:00
taizan-hokuto
f16c0ee73a Fix progress bar line feed and remove pbar option 2020-09-09 22:19:10 +09:00
taizan-hokuto
a338f2b782 Merge tag 'v0.2.0' into develop
v0.2.0
2020-09-07 23:35:45 +09:00
6 changed files with 41 additions and 89 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.0' __version__ = '0.2.2'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -1,11 +1,12 @@
import argparse import argparse
import os import os
import sys
import signal import signal
import time
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
from .arguments import Arguments from .arguments import Arguments
from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
from .. processors.html_archiver import HTMLArchiver from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor from .. tool.extract.extractor import Extractor
@@ -32,18 +33,12 @@ def main():
'If ID starts with a hyphen (-), enclose the ID in square brackets.') 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.PBAR}', action='store_true',
help='Display rich progress bar')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true', parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
help='Save error data when error occurs(".dat" file)') help='Save error data when error occurs(".dat" file)')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version') help='Show version')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
if Arguments().pbar:
from .progressbar_rich import ProgressBar
else:
from .progressbar_simple import ProgressBar
if Arguments().print_version: if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto') print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return return
@@ -52,7 +47,7 @@ def main():
if not Arguments().video_ids: if not Arguments().video_ids:
parser.print_help() parser.print_help()
return return
for video_id in Arguments().video_ids: for counter, video_id in enumerate(Arguments().video_ids):
if '[' in video_id: if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '') video_id = video_id.replace('[', '').replace(']', '')
try: try:
@@ -61,8 +56,24 @@ def main():
path = Path(Arguments().output + video_id + '.html') path = Path(Arguments().output + video_id + '.html')
else: else:
raise FileNotFoundError raise FileNotFoundError
info = VideoInfo(video_id) err = None
print(f"Extracting...\n" for _ in range(3): # retry 3 times
try:
info = VideoInfo(video_id)
break
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
err = e
time.sleep(2)
continue
else:
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(err.doc, "ERR", ".dat")
continue
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n"
f" video_id: {video_id}\n" f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}") f" title: {info.get_title()}")
@@ -70,17 +81,14 @@ def main():
print(f" output path: {path.resolve()}") print(f" output path: {path.resolve()}")
duration = info.get_duration() duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting") pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id, ex = Extractor(video_id,
callback=pbar._disp, callback=pbar._disp,
div=10) div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar))) signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
data = ex.extract() data = ex.extract()
if data == []: if data == []:
return False return False
if Arguments().pbar: pbar.reset("#", "=", total=len(data), status="Rendering ")
pbar.reset("#", "=", total=len(data), status="Rendering ")
else:
pbar.reset("=", "", total=len(data), status="Rendering ")
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp) processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
processor.process( processor.process(
[{'video_id': None, [{'video_id': None,
@@ -88,19 +96,11 @@ def main():
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}] 'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
) )
processor.finalize() processor.finalize()
if Arguments().pbar: pbar.reset('#', '#', status='Completed ')
pbar.reset('#', '#', status='Completed ') pbar.close()
pbar.close()
else:
pbar.close()
print("\nCompleted")
print() print()
if pbar.is_cancelled(): if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n") print("\nThe extraction process has been discontinued.\n")
return False
return True
except InvalidVideoIdException: except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id) print("Invalid Video ID or URL:", video_id)
except NoContents as e: except NoContents as e:
@@ -109,14 +109,9 @@ def main():
print("The specified directory does not exist.:{}".format(Arguments().output)) print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e: except JSONDecodeError as e:
print(e.msg) print(e.msg)
print("Cannot parse video information.:{}".format(video_id)) print("JSONDecodeError.:{}".format(video_id))
if Arguments().save_error_data: if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat") util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return return

View File

@@ -19,7 +19,6 @@ class Arguments(metaclass=Singleton):
OUTPUT: str = 'output_dir' OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id' VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data' SAVE_ERROR_DATA: bool = 'save_error_data'
PBAR: bool ='pbar'
def __init__(self, def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None): arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -37,7 +36,7 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT] self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = [] self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
self.pbar: bool = arguments[Arguments.Name.PBAR]
# Videos # Videos
if arguments[Arguments.Name.VIDEO_IDS]: if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id self.video_ids = [video_id

View File

@@ -4,6 +4,7 @@ vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License) (MIT License)
''' '''
import shutil
import sys import sys
@@ -13,8 +14,9 @@ class ProgressBar:
self._cancelled = False self._cancelled = False
self.reset(total=total, status=status) self.reset(total=total, status=status)
self._blinker = 0 self._blinker = 0
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''): def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
self._symbol_done = symbol_done self._symbol_done = symbol_done
self._symbol_space = symbol_space self._symbol_space = symbol_space
self._total = total self._total = total
@@ -37,7 +39,9 @@ class ProgressBar:
bar = self._symbol_done * filled_len + \ bar = self._symbol_done * filled_len + \
self._symbol_space * (self._bar_len - filled_len) self._symbol_space * (self._bar_len - filled_len)
sys.stdout.write(' [%s] %s%s ...%s \r' % (bar, percents, '%', self._status)) disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
sys.stdout.write(disp)
sys.stdout.flush() sys.stdout.flush()
self._blinker += 1 self._blinker += 1

View File

@@ -1,49 +0,0 @@
'''
This code for this progress bar is based on
vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License)
'''
import sys
class ProgressBar:
def __init__(self, total, status):
self._bar_len = 60
self._cancelled = False
print(''.join([' ' * 10, '|', '-' * (self._bar_len), '|']), end="")
self.reset(total=total, status=status)
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
self._status = status
self._old_len = 0
self._count = 0
print()
print(f'{status:<11}', end='')
def _disp(self, _, fetched):
self._progress(fetched, self._total)
def _progress(self, fillin, total):
if total == 0 or self._cancelled:
return
self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total)))
if filled_len > self._bar_len:
filled_len = self._bar_len
print((filled_len - self._old_len) * self._symbol_done, end="")
sys.stdout.flush()
self._old_len = filled_len
def close(self):
if not self._cancelled:
self._progress(self._total, self._total)
def cancel(self):
self._cancelled = True
def is_cancelled(self):
return self._cancelled

View File

@@ -38,7 +38,10 @@ class InvalidVideoIdException(Exception):
''' '''
Thrown when the video_id is not exist (VideoInfo). Thrown when the video_id is not exist (VideoInfo).
''' '''
pass def __init__(self, doc):
self.msg = "InvalidVideoIdException"
self.doc = doc
class UnknownConnectionError(Exception): class UnknownConnectionError(Exception):
@@ -47,7 +50,7 @@ class UnknownConnectionError(Exception):
class RetryExceedMaxCount(Exception): class RetryExceedMaxCount(Exception):
''' '''
thrown when the number of retries exceeds the maximum value. Thrown when the number of retries exceeds the maximum value.
''' '''
pass pass
@@ -66,13 +69,13 @@ class FailedExtractContinuation(ChatDataFinished):
class VideoInfoParseError(Exception): class VideoInfoParseError(Exception):
''' '''
thrown when failed to parse video info Base exception when parsing video info.
''' '''
class PatternUnmatchError(VideoInfoParseError): class PatternUnmatchError(VideoInfoParseError):
''' '''
thrown when failed to parse video info with unmatched pattern Thrown when failed to parse video info with unmatched pattern.
''' '''
def __init__(self, doc): def __init__(self, doc):
self.msg = "PatternUnmatchError" self.msg = "PatternUnmatchError"