From 4eb18279fea3616ebb49d729baef327af85aae49 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Thu, 3 Sep 2020 00:57:26 +0900 Subject: [PATCH 1/3] Add progress bar --- pytchat/cli/__init__.py | 94 ++++++++++++-------- pytchat/cli/progressbar.py | 41 +++++++++ pytchat/exceptions.py | 11 ++- pytchat/tool/videoinfo.py | 4 +- setup.py | 72 --------------- tests/test_extract_duplcheck.py | 2 +- tests/test_videoinfo.py | 26 +++++- tests/testdata/videoinfo/collapsed_page.txt | 15 ++++ tests/testdata/videoinfo/pattern_unmatch.txt | 15 ++++ 9 files changed, 168 insertions(+), 112 deletions(-) create mode 100644 pytchat/cli/progressbar.py delete mode 100644 setup.py create mode 100644 tests/testdata/videoinfo/collapsed_page.txt create mode 100644 tests/testdata/videoinfo/pattern_unmatch.txt diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 0d09de2..75a56e3 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -1,12 +1,17 @@ import argparse + import os +import signal +from json.decoder import JSONDecodeError from pathlib import Path -from pytchat.util.extract_video_id import extract_video_id from .arguments import Arguments -from .. exceptions import InvalidVideoIdException, NoContents, VideoInfoParseException +from .progressbar import ProgressBar +from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError from .. processors.html_archiver import HTMLArchiver from .. tool.extract.extractor import Extractor from .. tool.videoinfo import VideoInfo +from .. util.extract_video_id import extract_video_id +from .. import util from .. import __version__ ''' @@ -35,40 +40,57 @@ def main(): return # Extractor - if Arguments().video_ids: - for video_id in Arguments().video_ids: - if '[' in video_id: - video_id = video_id.replace('[', '').replace(']', '') - try: - video_id = extract_video_id(video_id) - if os.path.exists(Arguments().output): - path = Path(Arguments().output + video_id + '.html') - else: - raise FileNotFoundError - info = VideoInfo(video_id) - print(f"Extracting...\n" - f" video_id: {video_id}\n" - f" channel: {info.get_channel_name()}\n" - f" title: {info.get_title()}") - - print(f" output path: {path.resolve()}") - Extractor(video_id, - processor=HTMLArchiver( - Arguments().output + video_id + '.html'), - callback=_disp_progress - ).extract() - print("\nExtraction end.\n") - except InvalidVideoIdException: - print("Invalid Video ID or URL:", video_id) - except (TypeError, NoContents) as e: - print(e) - except FileNotFoundError: - print("The specified directory does not exist.:{}".format(Arguments().output)) - except VideoInfoParseException: - print("Cannot parse video information.:{}".format(video_id)) + if not Arguments().video_ids: + parser.print_help() return - parser.print_help() + for video_id in Arguments().video_ids: + if '[' in video_id: + video_id = video_id.replace('[', '').replace(']', '') + try: + video_id = extract_video_id(video_id) + if os.path.exists(Arguments().output): + path = Path(Arguments().output + video_id + '.html') + else: + raise FileNotFoundError + info = VideoInfo(video_id) + print(f"Extracting...\n" + f" video_id: {video_id}\n" + f" channel: {info.get_channel_name()}\n" + f" title: {info.get_title()}") + + print(f" output path: {path.resolve()}") + duration = info.get_duration() + pbar = ProgressBar(duration) + ex = Extractor(video_id, + processor=HTMLArchiver(Arguments().output + video_id + '.html'), + callback=pbar._disp, + div=10) + signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar))) + ex.extract() + pbar.close() + if pbar.is_cancelled(): + print("\nThe extraction process has been discontinued.\n") + return + print("\nThe extraction process has been completed.\n") + except InvalidVideoIdException: + print("Invalid Video ID or URL:", video_id) + except (TypeError, NoContents) as e: + + print(e.with_traceback()) + except FileNotFoundError: + print("The specified directory does not exist.:{}".format(Arguments().output)) + except JSONDecodeError as e: + print(e.msg) + print("Cannot parse video information.:{}".format(video_id)) + util.save(e.doc, "ERR_JSON_DECODE", ".dat") + except PatternUnmatchError as e: + print(e.msg) + print("Cannot parse video information.:{}".format(video_id)) + util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat") + + return -def _disp_progress(a, b): - print('.', end="", flush=True) +def cancel(ex: Extractor, pbar: ProgressBar): + ex.cancel() + pbar.cancel() diff --git a/pytchat/cli/progressbar.py b/pytchat/cli/progressbar.py new file mode 100644 index 0000000..d49b825 --- /dev/null +++ b/pytchat/cli/progressbar.py @@ -0,0 +1,41 @@ +''' +This code for this progress bar is based on +vladignatyev/progress.py +https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 +(MIT License) +''' +import sys + + +class ProgressBar: + def __init__(self, duration): + self._duration = duration + self._count = 0 + self._bar_len = 60 + self._cancelled = False + + def _disp(self, _, fetched): + self._progress(fetched / 1000, self._duration) + + def _progress(self, fillin, total, status=''): + if total == 0 or self._cancelled: + return + self._count += fillin + filled_len = int(round(self._bar_len * self._count / float(total))) + percents = round(100.0 * self._count / float(total), 1) + if filled_len > self._bar_len: + filled_len = self._bar_len + percents = 100 + bar = '=' * filled_len + ' ' * (self._bar_len - filled_len) + sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status)) + sys.stdout.flush() + + def close(self): + if not self._cancelled: + self._progress(self._duration, self._duration) + + def cancel(self): + self._cancelled = True + + def is_cancelled(self): + return self._cancelled diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index 1c1793c..af6d4af 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -64,7 +64,16 @@ class FailedExtractContinuation(ChatDataFinished): pass -class VideoInfoParseException(Exception): +class VideoInfoParseError(Exception): ''' thrown when failed to parse video info ''' + + +class PatternUnmatchError(VideoInfoParseError): + ''' + thrown when failed to parse video info with unmatched pattern + ''' + def __init__(self, doc): + self.msg = "PatternUnmatchError" + self.doc = doc diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index d1047d7..e2de5f5 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -2,7 +2,7 @@ import json import re import httpx from .. import config -from ..exceptions import InvalidVideoIdException +from ..exceptions import InvalidVideoIdException, PatternUnmatchError from ..util.extract_video_id import extract_video_id headers = config.headers @@ -91,6 +91,8 @@ class VideoInfo: def _parse(self, text): result = re.search(pattern, text) + if result is None: + raise PatternUnmatchError(text) res = json.loads(result.group(1)[:-1]) response = self._get_item(res, item_response) if response is None: diff --git a/setup.py b/setup.py deleted file mode 100644 index d03a9b0..0000000 --- a/setup.py +++ /dev/null @@ -1,72 +0,0 @@ -from setuptools import setup, find_packages, Command -from os import path, system, remove, rename, removedirs -import re - -package_name = "pytchat" - -root_dir = path.abspath(path.dirname(__file__)) - -def _requirements(): - return [name.rstrip() - for name in open(path.join( - root_dir, 'requirements.txt')).readlines()] - -def _test_requirements(): - return [name.rstrip() - for name in open(path.join( - root_dir, 'requirements_test.txt')).readlines()] - -with open(path.join(root_dir, package_name, '__init__.py')) as f: - init_text = f.read() - version = re.search( - r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) - license = re.search( - r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) - author = re.search( - r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) - author_email = re.search( - r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) - url = re.search( - r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) - -assert version -assert license -assert author -assert author_email -assert url - - - - -with open('README.md', encoding='utf-8') as f: - long_description = f.read() - - - -setup( - author=author, - author_email=author_email, - classifiers=[ - 'Natural Language :: Japanese', - 'Development Status :: 4 - Beta', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'License :: OSI Approved :: MIT License', - ], - description="a python library for fetching youtube live chat.", - entry_points= - ''' - [console_scripts] - pytchat=pytchat.cli:main - ''', - install_requires=_requirements(), - keywords='youtube livechat asyncio', - license=license, - long_description=long_description, - long_description_content_type='text/markdown', - name=package_name, - packages=find_packages(exclude=['*log.txt','*tests','*testrun']), - url=url, - version=version, -) \ No newline at end of file diff --git a/tests/test_extract_duplcheck.py b/tests/test_extract_duplcheck.py index c5fcffd..c432302 100644 --- a/tests/test_extract_duplcheck.py +++ b/tests/test_extract_duplcheck.py @@ -12,7 +12,7 @@ def _open_file(path): def test_overlap(): """ - test overlap data + test overlap data operation : [0] [2] [3] [4] -> last :align to end [1] , [5] -> no change diff --git a/tests/test_videoinfo.py b/tests/test_videoinfo.py index 71559ed..af1ba84 100644 --- a/tests/test_videoinfo.py +++ b/tests/test_videoinfo.py @@ -1,5 +1,7 @@ +from json.decoder import JSONDecodeError from pytchat.tool.videoinfo import VideoInfo -from pytchat.exceptions import InvalidVideoIdException +from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError +from pytchat import util def _open_file(path): @@ -64,3 +66,25 @@ def test_no_info(mocker): assert info.get_title() is None assert info.get_channel_id() is None assert info.get_duration() is None + + +def test_collapsed_data(mocker): + '''Test case the video page's info is collapsed.''' + _set_test_data( + 'tests/testdata/videoinfo/collapsed_page.txt', mocker) + try: + _ = VideoInfo('__test_id__') + assert False + except JSONDecodeError: + assert True + + +def test_pattern_unmatch(mocker): + '''Test case the pattern for extraction is unmatched.''' + _set_test_data( + 'tests/testdata/videoinfo/pattern_unmatch.txt', mocker) + try: + _ = VideoInfo('__test_id__') + assert False + except PatternUnmatchError: + assert True diff --git a/tests/testdata/videoinfo/collapsed_page.txt b/tests/testdata/videoinfo/collapsed_page.txt new file mode 100644 index 0000000..a60ec2a --- /dev/null +++ b/tests/testdata/videoinfo/collapsed_page.txt @@ -0,0 +1,15 @@ + +
+ + +