Merge branch 'feature/add_progressbar' into develop

This commit is contained in:
taizan-hokuto
2020-09-03 19:54:35 +09:00
10 changed files with 174 additions and 115 deletions

View File

@@ -1,12 +1,17 @@
import argparse
import os
import signal
from json.decoder import JSONDecodeError
from pathlib import Path
from pytchat.util.extract_video_id import extract_video_id
from .arguments import Arguments
from .. exceptions import InvalidVideoIdException, NoContents, VideoInfoParseException
from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
from .. import util
from .. import __version__
'''
@@ -29,13 +34,17 @@ def main():
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
help='Save error data when error occurs(".dat" file)')
Arguments(parser.parse_args().__dict__)
if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return
# Extractor
if Arguments().video_ids:
if not Arguments().video_ids:
parser.print_help()
return
for video_id in Arguments().video_ids:
if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '')
@@ -52,23 +61,40 @@ def main():
f" title: {info.get_title()}")
print(f" output path: {path.resolve()}")
Extractor(video_id,
processor=HTMLArchiver(
Arguments().output + video_id + '.html'),
callback=_disp_progress
).extract()
print("\nExtraction end.\n")
duration = info.get_duration()
pbar = ProgressBar(duration)
ex = Extractor(video_id,
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
callback=pbar._disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
ex.extract()
pbar.close()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
return
print("\nThe extraction process has been completed.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e:
print(e)
print(e.with_traceback())
except FileNotFoundError:
print("The specified directory does not exist.:{}".format(Arguments().output))
except VideoInfoParseException:
except JSONDecodeError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return
parser.print_help()
def _disp_progress(a, b):
print('.', end="", flush=True)
def cancel(ex: Extractor, pbar: ProgressBar):
ex.cancel()
pbar.cancel()

View File

@@ -18,6 +18,7 @@ class Arguments(metaclass=Singleton):
VERSION: str = 'version'
OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data'
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -34,10 +35,8 @@ class Arguments(metaclass=Singleton):
self.print_version: bool = arguments[Arguments.Name.VERSION]
self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
# Videos
if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]

View File

@@ -0,0 +1,41 @@
'''
This code for this progress bar is based on
vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License)
'''
import sys
class ProgressBar:
def __init__(self, duration):
self._duration = duration
self._count = 0
self._bar_len = 60
self._cancelled = False
def _disp(self, _, fetched):
self._progress(fetched / 1000, self._duration)
def _progress(self, fillin, total, status=''):
if total == 0 or self._cancelled:
return
self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total)))
percents = round(100.0 * self._count / float(total), 1)
if filled_len > self._bar_len:
filled_len = self._bar_len
percents = 100
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
sys.stdout.flush()
def close(self):
if not self._cancelled:
self._progress(self._duration, self._duration)
def cancel(self):
self._cancelled = True
def is_cancelled(self):
return self._cancelled

View File

@@ -64,7 +64,16 @@ class FailedExtractContinuation(ChatDataFinished):
pass
class VideoInfoParseException(Exception):
class VideoInfoParseError(Exception):
'''
thrown when failed to parse video info
'''
class PatternUnmatchError(VideoInfoParseError):
'''
thrown when failed to parse video info with unmatched pattern
'''
def __init__(self, doc):
self.msg = "PatternUnmatchError"
self.doc = doc

View File

@@ -2,7 +2,7 @@ import json
import re
import httpx
from .. import config
from ..exceptions import InvalidVideoIdException
from ..exceptions import InvalidVideoIdException, PatternUnmatchError
from ..util.extract_video_id import extract_video_id
headers = config.headers
@@ -91,6 +91,8 @@ class VideoInfo:
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError(text)
res = json.loads(result.group(1)[:-1])
response = self._get_item(res, item_response)
if response is None:

View File

@@ -1,72 +0,0 @@
from setuptools import setup, find_packages, Command
from os import path, system, remove, rename, removedirs
import re
package_name = "pytchat"
root_dir = path.abspath(path.dirname(__file__))
def _requirements():
return [name.rstrip()
for name in open(path.join(
root_dir, 'requirements.txt')).readlines()]
def _test_requirements():
return [name.rstrip()
for name in open(path.join(
root_dir, 'requirements_test.txt')).readlines()]
with open(path.join(root_dir, package_name, '__init__.py')) as f:
init_text = f.read()
version = re.search(
r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
license = re.search(
r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
author = re.search(
r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
author_email = re.search(
r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
url = re.search(
r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
assert version
assert license
assert author
assert author_email
assert url
with open('README.md', encoding='utf-8') as f:
long_description = f.read()
setup(
author=author,
author_email=author_email,
classifiers=[
'Natural Language :: Japanese',
'Development Status :: 4 - Beta',
'Programming Language :: Python',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'License :: OSI Approved :: MIT License',
],
description="a python library for fetching youtube live chat.",
entry_points=
'''
[console_scripts]
pytchat=pytchat.cli:main
''',
install_requires=_requirements(),
keywords='youtube livechat asyncio',
license=license,
long_description=long_description,
long_description_content_type='text/markdown',
name=package_name,
packages=find_packages(exclude=['*log.txt','*tests','*testrun']),
url=url,
version=version,
)

View File

@@ -1,5 +1,7 @@
from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
from pytchat import util
def _open_file(path):
@@ -64,3 +66,25 @@ def test_no_info(mocker):
assert info.get_title() is None
assert info.get_channel_id() is None
assert info.get_duration() is None
def test_collapsed_data(mocker):
'''Test case the video page's info is collapsed.'''
_set_test_data(
'tests/testdata/videoinfo/collapsed_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert False
except JSONDecodeError:
assert True
def test_pattern_unmatch(mocker):
'''Test case the pattern for extraction is unmatched.'''
_set_test_data(
'tests/testdata/videoinfo/pattern_unmatch.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert False
except PatternUnmatchError:
assert True

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long