Merge branch 'feature/add_progressbar' into develop

2020-09-03 19:54:35 +09:00
parent e9ed564e1b 8a8cef399f
commit 9413c4a186
10 changed files with 174 additions and 115 deletions
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -1,12 +1,17 @@
 import argparse
+
 import os
+import signal
+from json.decoder import JSONDecodeError
 from pathlib import Path
-from pytchat.util.extract_video_id import extract_video_id
 from .arguments import Arguments
-from .. exceptions import InvalidVideoIdException, NoContents, VideoInfoParseException
+from .progressbar import ProgressBar
+from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
 from .. processors.html_archiver import HTMLArchiver
 from .. tool.extract.extractor import Extractor
 from .. tool.videoinfo import VideoInfo
+from .. util.extract_video_id import extract_video_id
+from .. import util
 from .. import __version__

 '''
@@ -29,13 +34,17 @@ def main():
                        help='Output directory (end with "/"). default="./"', default='./')
    parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
                        help='Show version')
+    parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
+                        help='Save error data when error occurs(".dat" file)')
    Arguments(parser.parse_args().__dict__)
    if Arguments().print_version:
        print(f'pytchat v{__version__}     © 2019 taizan-hokuto')
        return

    # Extractor
-    if Arguments().video_ids:
+    if not Arguments().video_ids:
+        parser.print_help()
+        return
    for video_id in Arguments().video_ids:
        if '[' in video_id:
            video_id = video_id.replace('[', '').replace(']', '')
@@ -52,23 +61,40 @@ def main():
                  f" title:    {info.get_title()}")

            print(f" output path: {path.resolve()}")
-                Extractor(video_id,
-                          processor=HTMLArchiver(
-                              Arguments().output + video_id + '.html'),
-                          callback=_disp_progress
-                          ).extract()
-                print("\nExtraction end.\n")
+            duration = info.get_duration()
+            pbar = ProgressBar(duration)
+            ex = Extractor(video_id,
+                    processor=HTMLArchiver(Arguments().output + video_id + '.html'),
+                    callback=pbar._disp,
+                    div=10)
+            signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
+            ex.extract()
+            pbar.close()
+            if pbar.is_cancelled():
+                print("\nThe extraction process has been discontinued.\n")
+                return
+            print("\nThe extraction process has been completed.\n")
        except InvalidVideoIdException:
            print("Invalid Video ID or URL:", video_id)
        except (TypeError, NoContents) as e:
-                print(e)
+
+            print(e.with_traceback())
        except FileNotFoundError:
            print("The specified directory does not exist.:{}".format(Arguments().output))
-            except VideoInfoParseException:
+        except JSONDecodeError as e:
+            print(e.msg)
            print("Cannot parse video information.:{}".format(video_id))
+            if Arguments().save_error_data:
+                util.save(e.doc, "ERR_JSON_DECODE", ".dat")
+        except PatternUnmatchError as e:
+            print(e.msg)
+            print("Cannot parse video information.:{}".format(video_id))
+            if Arguments().save_error_data:
+                util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
+
    return
-    parser.print_help()


-def _disp_progress(a, b):
-    print('.', end="", flush=True)
+def cancel(ex: Extractor, pbar: ProgressBar):
+    ex.cancel()
+    pbar.cancel()
--- a/pytchat/cli/arguments.py
+++ b/pytchat/cli/arguments.py
@@ -18,6 +18,7 @@ class Arguments(metaclass=Singleton):
        VERSION: str = 'version'
        OUTPUT: str = 'output_dir'
        VIDEO_IDS: str = 'video_id'
+        SAVE_ERROR_DATA: bool = 'save_error_data'

    def __init__(self,
                 arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -34,10 +35,8 @@ class Arguments(metaclass=Singleton):
        self.print_version: bool = arguments[Arguments.Name.VERSION]
        self.output: str = arguments[Arguments.Name.OUTPUT]
        self.video_ids: List[int] = []
+        self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
        # Videos
        if arguments[Arguments.Name.VIDEO_IDS]:
            self.video_ids = [video_id
                              for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
-
-
-
--- a/pytchat/cli/progressbar.py
+++ b/pytchat/cli/progressbar.py
@@ -0,0 +1,41 @@
+'''
+This code for this progress bar is based on
+vladignatyev/progress.py
+https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
+(MIT License)
+'''
+import sys
+
+
+class ProgressBar:
+    def __init__(self, duration):
+        self._duration = duration
+        self._count = 0
+        self._bar_len = 60
+        self._cancelled = False
+
+    def _disp(self, _, fetched):
+        self._progress(fetched / 1000, self._duration)
+
+    def _progress(self, fillin, total, status=''):
+        if total == 0 or self._cancelled:
+            return
+        self._count += fillin
+        filled_len = int(round(self._bar_len * self._count / float(total)))
+        percents = round(100.0 * self._count / float(total), 1)
+        if filled_len > self._bar_len:
+            filled_len = self._bar_len
+            percents = 100
+        bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
+        sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
+        sys.stdout.flush()
+
+    def close(self):
+        if not self._cancelled:
+            self._progress(self._duration, self._duration)
+
+    def cancel(self):
+        self._cancelled = True
+    
+    def is_cancelled(self):
+        return self._cancelled
--- a/pytchat/exceptions.py
+++ b/pytchat/exceptions.py
@@ -64,7 +64,16 @@ class FailedExtractContinuation(ChatDataFinished):
    pass


-class VideoInfoParseException(Exception):
+class VideoInfoParseError(Exception):
    '''
    thrown when failed to parse video info
    '''
+
+
+class PatternUnmatchError(VideoInfoParseError):
+    '''
+    thrown when failed to parse video info with unmatched pattern
+    '''
+    def __init__(self, doc):
+        self.msg = "PatternUnmatchError"
+        self.doc = doc
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -2,7 +2,7 @@ import json
 import re
 import httpx
 from .. import config
-from ..exceptions import InvalidVideoIdException
+from ..exceptions import InvalidVideoIdException, PatternUnmatchError
 from ..util.extract_video_id import extract_video_id

 headers = config.headers
@@ -91,6 +91,8 @@ class VideoInfo:

    def _parse(self, text):
        result = re.search(pattern, text)
+        if result is None:
+            raise PatternUnmatchError(text)
        res = json.loads(result.group(1)[:-1])
        response = self._get_item(res, item_response)
        if response is None:
--- a/setup.py
+++ b/setup.py
@@ -1,72 +0,0 @@
-from setuptools import setup, find_packages, Command
-from os import path, system, remove, rename, removedirs
-import re
-
-package_name = "pytchat"
-
-root_dir = path.abspath(path.dirname(__file__))
-
-def _requirements():
-    return [name.rstrip() 
-        for name in open(path.join(
-            root_dir, 'requirements.txt')).readlines()]
-
-def _test_requirements():
-    return [name.rstrip() 
-        for name in open(path.join(
-            root_dir, 'requirements_test.txt')).readlines()]
-
-with open(path.join(root_dir, package_name, '__init__.py')) as f:
-    init_text = f.read()
-    version = re.search(
-        r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
-    license = re.search(
-        r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
-    author = re.search(
-        r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
-    author_email = re.search(
-        r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
-    url = re.search(
-        r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
-
-assert version
-assert license
-assert author
-assert author_email
-assert url
-
-
-
-
-with open('README.md', encoding='utf-8') as f:
-    long_description = f.read()
-
-
-
-setup(
-    author=author,
-    author_email=author_email,
-    classifiers=[
-        'Natural Language :: Japanese',
-        'Development Status :: 4 - Beta',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3.7',        
-        'Programming Language :: Python :: 3.8',        
-        'License :: OSI Approved :: MIT License',
-    ],
-    description="a python library for fetching youtube live chat.",
-    entry_points=
-    '''
-        [console_scripts]
-        pytchat=pytchat.cli:main
-    ''',
-    install_requires=_requirements(),
-    keywords='youtube livechat asyncio',
-    license=license,
-    long_description=long_description,
-    long_description_content_type='text/markdown',
-    name=package_name,
-    packages=find_packages(exclude=['*log.txt','*tests','*testrun']),
-    url=url,
-    version=version,
-)
--- a/tests/test_videoinfo.py
+++ b/tests/test_videoinfo.py
@@ -1,5 +1,7 @@
+from json.decoder import JSONDecodeError
 from pytchat.tool.videoinfo import VideoInfo
-from pytchat.exceptions import InvalidVideoIdException
+from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
+from pytchat import util


 def _open_file(path):
@@ -64,3 +66,25 @@ def test_no_info(mocker):
    assert info.get_title() is None
    assert info.get_channel_id() is None
    assert info.get_duration() is None
+
+
+def test_collapsed_data(mocker):
+    '''Test case the video page's info is collapsed.'''
+    _set_test_data(
+        'tests/testdata/videoinfo/collapsed_page.txt', mocker)
+    try:
+        _ = VideoInfo('__test_id__')
+        assert False
+    except JSONDecodeError:
+        assert True
+
+
+def test_pattern_unmatch(mocker):
+    '''Test case the pattern for extraction is unmatched.'''
+    _set_test_data(
+        'tests/testdata/videoinfo/pattern_unmatch.txt', mocker)
+    try:
+        _ = VideoInfo('__test_id__')
+        assert False
+    except PatternUnmatchError:
+        assert True
--- a/tests/testdata/videoinfo/collapsed_page.txt
+++ b/tests/testdata/videoinfo/collapsed_page.txt
--- a/tests/testdata/videoinfo/pattern_unmatch.txt
+++ b/tests/testdata/videoinfo/pattern_unmatch.txt