Merge branch 'feature/add_progressbar' into develop
This commit is contained in:
@@ -1,12 +1,17 @@
|
||||
import argparse
|
||||
|
||||
import os
|
||||
import signal
|
||||
from json.decoder import JSONDecodeError
|
||||
from pathlib import Path
|
||||
from pytchat.util.extract_video_id import extract_video_id
|
||||
from .arguments import Arguments
|
||||
from .. exceptions import InvalidVideoIdException, NoContents, VideoInfoParseException
|
||||
from .progressbar import ProgressBar
|
||||
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
|
||||
from .. processors.html_archiver import HTMLArchiver
|
||||
from .. tool.extract.extractor import Extractor
|
||||
from .. tool.videoinfo import VideoInfo
|
||||
from .. util.extract_video_id import extract_video_id
|
||||
from .. import util
|
||||
from .. import __version__
|
||||
|
||||
'''
|
||||
@@ -29,13 +34,17 @@ def main():
|
||||
help='Output directory (end with "/"). default="./"', default='./')
|
||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||
help='Show version')
|
||||
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
|
||||
help='Save error data when error occurs(".dat" file)')
|
||||
Arguments(parser.parse_args().__dict__)
|
||||
if Arguments().print_version:
|
||||
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
||||
return
|
||||
|
||||
# Extractor
|
||||
if Arguments().video_ids:
|
||||
if not Arguments().video_ids:
|
||||
parser.print_help()
|
||||
return
|
||||
for video_id in Arguments().video_ids:
|
||||
if '[' in video_id:
|
||||
video_id = video_id.replace('[', '').replace(']', '')
|
||||
@@ -52,23 +61,40 @@ def main():
|
||||
f" title: {info.get_title()}")
|
||||
|
||||
print(f" output path: {path.resolve()}")
|
||||
Extractor(video_id,
|
||||
processor=HTMLArchiver(
|
||||
Arguments().output + video_id + '.html'),
|
||||
callback=_disp_progress
|
||||
).extract()
|
||||
print("\nExtraction end.\n")
|
||||
duration = info.get_duration()
|
||||
pbar = ProgressBar(duration)
|
||||
ex = Extractor(video_id,
|
||||
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
|
||||
callback=pbar._disp,
|
||||
div=10)
|
||||
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
||||
ex.extract()
|
||||
pbar.close()
|
||||
if pbar.is_cancelled():
|
||||
print("\nThe extraction process has been discontinued.\n")
|
||||
return
|
||||
print("\nThe extraction process has been completed.\n")
|
||||
except InvalidVideoIdException:
|
||||
print("Invalid Video ID or URL:", video_id)
|
||||
except (TypeError, NoContents) as e:
|
||||
print(e)
|
||||
|
||||
print(e.with_traceback())
|
||||
except FileNotFoundError:
|
||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
||||
except VideoInfoParseException:
|
||||
except JSONDecodeError as e:
|
||||
print(e.msg)
|
||||
print("Cannot parse video information.:{}".format(video_id))
|
||||
if Arguments().save_error_data:
|
||||
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
||||
except PatternUnmatchError as e:
|
||||
print(e.msg)
|
||||
print("Cannot parse video information.:{}".format(video_id))
|
||||
if Arguments().save_error_data:
|
||||
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
|
||||
|
||||
return
|
||||
parser.print_help()
|
||||
|
||||
|
||||
def _disp_progress(a, b):
|
||||
print('.', end="", flush=True)
|
||||
def cancel(ex: Extractor, pbar: ProgressBar):
|
||||
ex.cancel()
|
||||
pbar.cancel()
|
||||
|
||||
@@ -18,6 +18,7 @@ class Arguments(metaclass=Singleton):
|
||||
VERSION: str = 'version'
|
||||
OUTPUT: str = 'output_dir'
|
||||
VIDEO_IDS: str = 'video_id'
|
||||
SAVE_ERROR_DATA: bool = 'save_error_data'
|
||||
|
||||
def __init__(self,
|
||||
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||
@@ -34,10 +35,8 @@ class Arguments(metaclass=Singleton):
|
||||
self.print_version: bool = arguments[Arguments.Name.VERSION]
|
||||
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||
self.video_ids: List[int] = []
|
||||
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
|
||||
# Videos
|
||||
if arguments[Arguments.Name.VIDEO_IDS]:
|
||||
self.video_ids = [video_id
|
||||
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
|
||||
|
||||
|
||||
|
||||
|
||||
41
pytchat/cli/progressbar.py
Normal file
41
pytchat/cli/progressbar.py
Normal file
@@ -0,0 +1,41 @@
|
||||
'''
|
||||
This code for this progress bar is based on
|
||||
vladignatyev/progress.py
|
||||
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
||||
(MIT License)
|
||||
'''
|
||||
import sys
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, duration):
|
||||
self._duration = duration
|
||||
self._count = 0
|
||||
self._bar_len = 60
|
||||
self._cancelled = False
|
||||
|
||||
def _disp(self, _, fetched):
|
||||
self._progress(fetched / 1000, self._duration)
|
||||
|
||||
def _progress(self, fillin, total, status=''):
|
||||
if total == 0 or self._cancelled:
|
||||
return
|
||||
self._count += fillin
|
||||
filled_len = int(round(self._bar_len * self._count / float(total)))
|
||||
percents = round(100.0 * self._count / float(total), 1)
|
||||
if filled_len > self._bar_len:
|
||||
filled_len = self._bar_len
|
||||
percents = 100
|
||||
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
|
||||
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
|
||||
sys.stdout.flush()
|
||||
|
||||
def close(self):
|
||||
if not self._cancelled:
|
||||
self._progress(self._duration, self._duration)
|
||||
|
||||
def cancel(self):
|
||||
self._cancelled = True
|
||||
|
||||
def is_cancelled(self):
|
||||
return self._cancelled
|
||||
@@ -64,7 +64,16 @@ class FailedExtractContinuation(ChatDataFinished):
|
||||
pass
|
||||
|
||||
|
||||
class VideoInfoParseException(Exception):
|
||||
class VideoInfoParseError(Exception):
|
||||
'''
|
||||
thrown when failed to parse video info
|
||||
'''
|
||||
|
||||
|
||||
class PatternUnmatchError(VideoInfoParseError):
|
||||
'''
|
||||
thrown when failed to parse video info with unmatched pattern
|
||||
'''
|
||||
def __init__(self, doc):
|
||||
self.msg = "PatternUnmatchError"
|
||||
self.doc = doc
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
import re
|
||||
import httpx
|
||||
from .. import config
|
||||
from ..exceptions import InvalidVideoIdException
|
||||
from ..exceptions import InvalidVideoIdException, PatternUnmatchError
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
@@ -91,6 +91,8 @@ class VideoInfo:
|
||||
|
||||
def _parse(self, text):
|
||||
result = re.search(pattern, text)
|
||||
if result is None:
|
||||
raise PatternUnmatchError(text)
|
||||
res = json.loads(result.group(1)[:-1])
|
||||
response = self._get_item(res, item_response)
|
||||
if response is None:
|
||||
|
||||
72
setup.py
72
setup.py
@@ -1,72 +0,0 @@
|
||||
from setuptools import setup, find_packages, Command
|
||||
from os import path, system, remove, rename, removedirs
|
||||
import re
|
||||
|
||||
package_name = "pytchat"
|
||||
|
||||
root_dir = path.abspath(path.dirname(__file__))
|
||||
|
||||
def _requirements():
|
||||
return [name.rstrip()
|
||||
for name in open(path.join(
|
||||
root_dir, 'requirements.txt')).readlines()]
|
||||
|
||||
def _test_requirements():
|
||||
return [name.rstrip()
|
||||
for name in open(path.join(
|
||||
root_dir, 'requirements_test.txt')).readlines()]
|
||||
|
||||
with open(path.join(root_dir, package_name, '__init__.py')) as f:
|
||||
init_text = f.read()
|
||||
version = re.search(
|
||||
r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||
license = re.search(
|
||||
r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||
author = re.search(
|
||||
r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||
author_email = re.search(
|
||||
r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||
url = re.search(
|
||||
r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||
|
||||
assert version
|
||||
assert license
|
||||
assert author
|
||||
assert author_email
|
||||
assert url
|
||||
|
||||
|
||||
|
||||
|
||||
with open('README.md', encoding='utf-8') as f:
|
||||
long_description = f.read()
|
||||
|
||||
|
||||
|
||||
setup(
|
||||
author=author,
|
||||
author_email=author_email,
|
||||
classifiers=[
|
||||
'Natural Language :: Japanese',
|
||||
'Development Status :: 4 - Beta',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
],
|
||||
description="a python library for fetching youtube live chat.",
|
||||
entry_points=
|
||||
'''
|
||||
[console_scripts]
|
||||
pytchat=pytchat.cli:main
|
||||
''',
|
||||
install_requires=_requirements(),
|
||||
keywords='youtube livechat asyncio',
|
||||
license=license,
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
name=package_name,
|
||||
packages=find_packages(exclude=['*log.txt','*tests','*testrun']),
|
||||
url=url,
|
||||
version=version,
|
||||
)
|
||||
@@ -1,5 +1,7 @@
|
||||
from json.decoder import JSONDecodeError
|
||||
from pytchat.tool.videoinfo import VideoInfo
|
||||
from pytchat.exceptions import InvalidVideoIdException
|
||||
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
|
||||
from pytchat import util
|
||||
|
||||
|
||||
def _open_file(path):
|
||||
@@ -64,3 +66,25 @@ def test_no_info(mocker):
|
||||
assert info.get_title() is None
|
||||
assert info.get_channel_id() is None
|
||||
assert info.get_duration() is None
|
||||
|
||||
|
||||
def test_collapsed_data(mocker):
|
||||
'''Test case the video page's info is collapsed.'''
|
||||
_set_test_data(
|
||||
'tests/testdata/videoinfo/collapsed_page.txt', mocker)
|
||||
try:
|
||||
_ = VideoInfo('__test_id__')
|
||||
assert False
|
||||
except JSONDecodeError:
|
||||
assert True
|
||||
|
||||
|
||||
def test_pattern_unmatch(mocker):
|
||||
'''Test case the pattern for extraction is unmatched.'''
|
||||
_set_test_data(
|
||||
'tests/testdata/videoinfo/pattern_unmatch.txt', mocker)
|
||||
try:
|
||||
_ = VideoInfo('__test_id__')
|
||||
assert False
|
||||
except PatternUnmatchError:
|
||||
assert True
|
||||
|
||||
15
tests/testdata/videoinfo/collapsed_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/collapsed_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
15
tests/testdata/videoinfo/pattern_unmatch.txt
vendored
Normal file
15
tests/testdata/videoinfo/pattern_unmatch.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user