Make it possible to extract video id from url

2020-07-24 14:03:07 +09:00
parent 5ab653a1b2
commit 0abf8dd9f0
8 changed files with 54 additions and 16 deletions
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -1,5 +1,6 @@
 import argparse
 from pathlib import Path
+from pytchat.util.extract_video_id import extract_video_id
 from .arguments import Arguments
 from .. exceptions import InvalidVideoIdException, NoContents
 from .. processors.html_archiver import HTMLArchiver
@@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
 def main():
    # Arguments
    parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
-    parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
-                        help='Video IDs separated by commas without space.\n'
+    # parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
+    #                     help='Video ID, or URL that includes id.\n'
+    #                     'If ID starts with a hyphen (-), enclose the ID in square brackets.')
+    parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
+                        help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
                        'If ID starts with a hyphen (-), enclose the ID in square brackets.')
    parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
                        help='Output directory (end with "/"). default="./"', default='./')
    parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
-                        help='Settings version')
+                        help='Show version')
    Arguments(parser.parse_args().__dict__)
    if Arguments().print_version:
-        print(f'pytchat v{__version__}')
+        print(f'pytchat v{__version__}     © 2019 taizan-hokuto')
        return

    # Extractor
@@ -43,14 +47,16 @@ def main():
                      f" channel:  {info.get_channel_name()}\n"
                      f" title:    {info.get_title()}")
                path = Path(Arguments().output + video_id + '.html')
-                print(f"output path: {path.resolve()}")
+                print(f" output path: {path.resolve()}")
                Extractor(video_id,
                          processor=HTMLArchiver(
                              Arguments().output + video_id + '.html'),
                          callback=_disp_progress
                          ).extract()
                print("\nExtraction end.\n")
-            except (InvalidVideoIdException, NoContents) as e:
+            except InvalidVideoIdException:
+                print("Invalid Video ID or URL:", video_id)
+            except (TypeError, NoContents) as e:
                print(e)
        return
    parser.print_help()
--- a/pytchat/cli/arguments.py
+++ b/pytchat/cli/arguments.py
@@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton):

    class Name:
        VERSION: str = 'version'
-        OUTPUT: str = 'output'
-        VIDEO: str = 'video'
+        OUTPUT: str = 'output_dir'
+        VIDEO_IDS: str = 'video_id'

    def __init__(self,
                 arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton):
        self.output: str = arguments[Arguments.Name.OUTPUT]
        self.video_ids: List[int] = []
        # Videos
-        if arguments[Arguments.Name.VIDEO]:
+        if arguments[Arguments.Name.VIDEO_IDS]:
            self.video_ids = [video_id
-                              for video_id in arguments[Arguments.Name.VIDEO].split(',')]
+                              for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
+
+
+
--- a/pytchat/core_async/livechat.py
+++ b/pytchat/core_async/livechat.py
@@ -15,6 +15,7 @@ from .. import exceptions
 from ..paramgen import liveparam, arcparam
 from ..processors.default.processor import DefaultProcessor
 from ..processors.combinator import Combinator
+from ..util.extract_video_id import extract_video_id

 headers = config.headers
 MAX_RETRY = 10
@@ -86,7 +87,7 @@ class LiveChatAsync:
                 topchat_only=False,
                 logger=config.logger(__name__),
                 ):
-        self._video_id = video_id
+        self._video_id = extract_video_id(video_id)
        self.seektime = seektime
        if isinstance(processor, tuple):
            self.processor = Combinator(processor)
--- a/pytchat/core_multithread/livechat.py
+++ b/pytchat/core_multithread/livechat.py
@@ -14,6 +14,7 @@ from .. import exceptions
 from ..paramgen import liveparam, arcparam
 from ..processors.default.processor import DefaultProcessor
 from ..processors.combinator import Combinator
+from ..util.extract_video_id import extract_video_id

 headers = config.headers
 MAX_RETRY = 10
@@ -84,7 +85,7 @@ class LiveChat:
                 topchat_only=False,
                 logger=config.logger(__name__)
                 ):
-        self._video_id = video_id
+        self._video_id = extract_video_id(video_id)
        self.seektime = seektime
        if isinstance(processor, tuple):
            self.processor = Combinator(processor)
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor):
        super().__init__()
        self.save_path = self._checkpath(save_path)
        self.processor = DefaultProcessor()
-        self.emoji_table = {}  # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
+        self.emoji_table = {}  # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
        self.header = [HEADER_HTML]
        self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]

--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -3,6 +3,7 @@ from . import duplcheck
 from .. videoinfo import VideoInfo
 from ... import config
 from ... exceptions import InvalidVideoIdException
+from ... util.extract_video_id import extract_video_id

 logger = config.logger(__name__)
 headers = config.headers
@@ -14,7 +15,7 @@ class Extractor:
            raise ValueError('div must be positive integer.')
        elif div > 10:
            div = 10
-        self.video_id = video_id
+        self.video_id = extract_video_id(video_id)
        self.div = div
        self.callback = callback
        self.processor = processor
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -3,6 +3,7 @@ import re
 import requests
 from .. import config
 from ..exceptions import InvalidVideoIdException
+from ..util.extract_video_id import extract_video_id

 headers = config.headers

@@ -78,8 +79,8 @@ class VideoInfo:
    '''

    def __init__(self, video_id):
-        self.video_id = video_id
-        text = self._get_page_text(video_id)
+        self.video_id = extract_video_id(video_id)
+        text = self._get_page_text(self.video_id)
        self._parse(text)

    def _get_page_text(self, video_id):
--- a/pytchat/util/extract_video_id.py
+++ b/pytchat/util/extract_video_id.py
@@ -0,0 +1,25 @@
+import re
+from .. exceptions import InvalidVideoIdException
+
+
+PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
+YT_VIDEO_ID_LENGTH = 11
+
+
+def extract_video_id(url_or_id: str) -> str:
+    ret = ''
+    if type(url_or_id) != str:
+        raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
+    if len(url_or_id) == YT_VIDEO_ID_LENGTH:
+        return url_or_id
+    match = re.search(PATTERN, url_or_id)
+    if match is None:
+        raise InvalidVideoIdException(url_or_id)
+    try:
+        ret = match.group(4)
+    except IndexError:
+        raise InvalidVideoIdException(url_or_id)
+
+    if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
+        raise InvalidVideoIdException(url_or_id)
+    return ret