From 0abf8dd9f0bae6ff3b0175265301672b583e27e1 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Fri, 24 Jul 2020 14:03:07 +0900 Subject: [PATCH] Make it possible to extract video id from url --- pytchat/cli/__init__.py | 18 ++++++++++++------ pytchat/cli/arguments.py | 11 +++++++---- pytchat/core_async/livechat.py | 3 ++- pytchat/core_multithread/livechat.py | 3 ++- pytchat/processors/html_archiver.py | 2 +- pytchat/tool/extract/extractor.py | 3 ++- pytchat/tool/videoinfo.py | 5 +++-- pytchat/util/extract_video_id.py | 25 +++++++++++++++++++++++++ 8 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 pytchat/util/extract_video_id.py diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 696af45..62237ff 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -1,5 +1,6 @@ import argparse from pathlib import Path +from pytchat.util.extract_video_id import extract_video_id from .arguments import Arguments from .. exceptions import InvalidVideoIdException, NoContents from .. processors.html_archiver import HTMLArchiver @@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader def main(): # Arguments parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') - parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, - help='Video IDs separated by commas without space.\n' + # parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?', + # help='Video ID, or URL that includes id.\n' + # 'If ID starts with a hyphen (-), enclose the ID in square brackets.') + parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str, + help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n' 'If ID starts with a hyphen (-), enclose the ID in square brackets.') parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, help='Output directory (end with "/"). default="./"', default='./') parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', - help='Settings version') + help='Show version') Arguments(parser.parse_args().__dict__) if Arguments().print_version: - print(f'pytchat v{__version__}') + print(f'pytchat v{__version__} © 2019 taizan-hokuto') return # Extractor @@ -43,14 +47,16 @@ def main(): f" channel: {info.get_channel_name()}\n" f" title: {info.get_title()}") path = Path(Arguments().output + video_id + '.html') - print(f"output path: {path.resolve()}") + print(f" output path: {path.resolve()}") Extractor(video_id, processor=HTMLArchiver( Arguments().output + video_id + '.html'), callback=_disp_progress ).extract() print("\nExtraction end.\n") - except (InvalidVideoIdException, NoContents) as e: + except InvalidVideoIdException: + print("Invalid Video ID or URL:", video_id) + except (TypeError, NoContents) as e: print(e) return parser.print_help() diff --git a/pytchat/cli/arguments.py b/pytchat/cli/arguments.py index d6fea2b..6f62548 100644 --- a/pytchat/cli/arguments.py +++ b/pytchat/cli/arguments.py @@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton): class Name: VERSION: str = 'version' - OUTPUT: str = 'output' - VIDEO: str = 'video' + OUTPUT: str = 'output_dir' + VIDEO_IDS: str = 'video_id' def __init__(self, arguments: Optional[Dict[str, Union[str, bool, int]]] = None): @@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton): self.output: str = arguments[Arguments.Name.OUTPUT] self.video_ids: List[int] = [] # Videos - if arguments[Arguments.Name.VIDEO]: + if arguments[Arguments.Name.VIDEO_IDS]: self.video_ids = [video_id - for video_id in arguments[Arguments.Name.VIDEO].split(',')] + for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')] + + + diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 2cc3ff8..17f91f2 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -15,6 +15,7 @@ from .. import exceptions from ..paramgen import liveparam, arcparam from ..processors.default.processor import DefaultProcessor from ..processors.combinator import Combinator +from ..util.extract_video_id import extract_video_id headers = config.headers MAX_RETRY = 10 @@ -86,7 +87,7 @@ class LiveChatAsync: topchat_only=False, logger=config.logger(__name__), ): - self._video_id = video_id + self._video_id = extract_video_id(video_id) self.seektime = seektime if isinstance(processor, tuple): self.processor = Combinator(processor) diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index 7f99c55..f439026 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -14,6 +14,7 @@ from .. import exceptions from ..paramgen import liveparam, arcparam from ..processors.default.processor import DefaultProcessor from ..processors.combinator import Combinator +from ..util.extract_video_id import extract_video_id headers = config.headers MAX_RETRY = 10 @@ -84,7 +85,7 @@ class LiveChat: topchat_only=False, logger=config.logger(__name__) ): - self._video_id = video_id + self._video_id = extract_video_id(video_id) self.seektime = seektime if isinstance(processor, tuple): self.processor = Combinator(processor) diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index dedab39..3676770 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor): super().__init__() self.save_path = self._checkpath(save_path) self.processor = DefaultProcessor() - self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary. + self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.header = [HEADER_HTML] self.body = ['\n', '\n', self._parse_table_header(fmt_headers)] diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index 2b421af..56bd8aa 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -3,6 +3,7 @@ from . import duplcheck from .. videoinfo import VideoInfo from ... import config from ... exceptions import InvalidVideoIdException +from ... util.extract_video_id import extract_video_id logger = config.logger(__name__) headers = config.headers @@ -14,7 +15,7 @@ class Extractor: raise ValueError('div must be positive integer.') elif div > 10: div = 10 - self.video_id = video_id + self.video_id = extract_video_id(video_id) self.div = div self.callback = callback self.processor = processor diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index 13712dc..a87e0c4 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -3,6 +3,7 @@ import re import requests from .. import config from ..exceptions import InvalidVideoIdException +from ..util.extract_video_id import extract_video_id headers = config.headers @@ -78,8 +79,8 @@ class VideoInfo: ''' def __init__(self, video_id): - self.video_id = video_id - text = self._get_page_text(video_id) + self.video_id = extract_video_id(video_id) + text = self._get_page_text(self.video_id) self._parse(text) def _get_page_text(self, video_id): diff --git a/pytchat/util/extract_video_id.py b/pytchat/util/extract_video_id.py new file mode 100644 index 0000000..75385f8 --- /dev/null +++ b/pytchat/util/extract_video_id.py @@ -0,0 +1,25 @@ +import re +from .. exceptions import InvalidVideoIdException + + +PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)") +YT_VIDEO_ID_LENGTH = 11 + + +def extract_video_id(url_or_id: str) -> str: + ret = '' + if type(url_or_id) != str: + raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.") + if len(url_or_id) == YT_VIDEO_ID_LENGTH: + return url_or_id + match = re.search(PATTERN, url_or_id) + if match is None: + raise InvalidVideoIdException(url_or_id) + try: + ret = match.group(4) + except IndexError: + raise InvalidVideoIdException(url_or_id) + + if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: + raise InvalidVideoIdException(url_or_id) + return ret