Make it possible to extract video id from url
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from pytchat.util.extract_video_id import extract_video_id
|
||||
from .arguments import Arguments
|
||||
from .. exceptions import InvalidVideoIdException, NoContents
|
||||
from .. processors.html_archiver import HTMLArchiver
|
||||
@@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
||||
def main():
|
||||
# Arguments
|
||||
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
||||
help='Video IDs separated by commas without space.\n'
|
||||
# parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
|
||||
# help='Video ID, or URL that includes id.\n'
|
||||
# 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
|
||||
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
|
||||
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||
help='Output directory (end with "/"). default="./"', default='./')
|
||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||
help='Settings version')
|
||||
help='Show version')
|
||||
Arguments(parser.parse_args().__dict__)
|
||||
if Arguments().print_version:
|
||||
print(f'pytchat v{__version__}')
|
||||
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
||||
return
|
||||
|
||||
# Extractor
|
||||
@@ -50,7 +54,9 @@ def main():
|
||||
callback=_disp_progress
|
||||
).extract()
|
||||
print("\nExtraction end.\n")
|
||||
except (InvalidVideoIdException, NoContents) as e:
|
||||
except InvalidVideoIdException:
|
||||
print("Invalid Video ID or URL:", video_id)
|
||||
except (TypeError, NoContents) as e:
|
||||
print(e)
|
||||
return
|
||||
parser.print_help()
|
||||
|
||||
@@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton):
|
||||
|
||||
class Name:
|
||||
VERSION: str = 'version'
|
||||
OUTPUT: str = 'output'
|
||||
VIDEO: str = 'video'
|
||||
OUTPUT: str = 'output_dir'
|
||||
VIDEO_IDS: str = 'video_id'
|
||||
|
||||
def __init__(self,
|
||||
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||
@@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton):
|
||||
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||
self.video_ids: List[int] = []
|
||||
# Videos
|
||||
if arguments[Arguments.Name.VIDEO]:
|
||||
if arguments[Arguments.Name.VIDEO_IDS]:
|
||||
self.video_ids = [video_id
|
||||
for video_id in arguments[Arguments.Name.VIDEO].split(',')]
|
||||
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from .. import exceptions
|
||||
from ..paramgen import liveparam, arcparam
|
||||
from ..processors.default.processor import DefaultProcessor
|
||||
from ..processors.combinator import Combinator
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
MAX_RETRY = 10
|
||||
@@ -86,7 +87,7 @@ class LiveChatAsync:
|
||||
topchat_only=False,
|
||||
logger=config.logger(__name__),
|
||||
):
|
||||
self._video_id = video_id
|
||||
self._video_id = extract_video_id(video_id)
|
||||
self.seektime = seektime
|
||||
if isinstance(processor, tuple):
|
||||
self.processor = Combinator(processor)
|
||||
|
||||
@@ -14,6 +14,7 @@ from .. import exceptions
|
||||
from ..paramgen import liveparam, arcparam
|
||||
from ..processors.default.processor import DefaultProcessor
|
||||
from ..processors.combinator import Combinator
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
MAX_RETRY = 10
|
||||
@@ -84,7 +85,7 @@ class LiveChat:
|
||||
topchat_only=False,
|
||||
logger=config.logger(__name__)
|
||||
):
|
||||
self._video_id = video_id
|
||||
self._video_id = extract_video_id(video_id)
|
||||
self.seektime = seektime
|
||||
if isinstance(processor, tuple):
|
||||
self.processor = Combinator(processor)
|
||||
|
||||
@@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor):
|
||||
super().__init__()
|
||||
self.save_path = self._checkpath(save_path)
|
||||
self.processor = DefaultProcessor()
|
||||
self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||
self.header = [HEADER_HTML]
|
||||
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ from . import duplcheck
|
||||
from .. videoinfo import VideoInfo
|
||||
from ... import config
|
||||
from ... exceptions import InvalidVideoIdException
|
||||
from ... util.extract_video_id import extract_video_id
|
||||
|
||||
logger = config.logger(__name__)
|
||||
headers = config.headers
|
||||
@@ -14,7 +15,7 @@ class Extractor:
|
||||
raise ValueError('div must be positive integer.')
|
||||
elif div > 10:
|
||||
div = 10
|
||||
self.video_id = video_id
|
||||
self.video_id = extract_video_id(video_id)
|
||||
self.div = div
|
||||
self.callback = callback
|
||||
self.processor = processor
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
import requests
|
||||
from .. import config
|
||||
from ..exceptions import InvalidVideoIdException
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
|
||||
@@ -78,8 +79,8 @@ class VideoInfo:
|
||||
'''
|
||||
|
||||
def __init__(self, video_id):
|
||||
self.video_id = video_id
|
||||
text = self._get_page_text(video_id)
|
||||
self.video_id = extract_video_id(video_id)
|
||||
text = self._get_page_text(self.video_id)
|
||||
self._parse(text)
|
||||
|
||||
def _get_page_text(self, video_id):
|
||||
|
||||
25
pytchat/util/extract_video_id.py
Normal file
25
pytchat/util/extract_video_id.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import re
|
||||
from .. exceptions import InvalidVideoIdException
|
||||
|
||||
|
||||
PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
|
||||
YT_VIDEO_ID_LENGTH = 11
|
||||
|
||||
|
||||
def extract_video_id(url_or_id: str) -> str:
|
||||
ret = ''
|
||||
if type(url_or_id) != str:
|
||||
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
||||
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
||||
return url_or_id
|
||||
match = re.search(PATTERN, url_or_id)
|
||||
if match is None:
|
||||
raise InvalidVideoIdException(url_or_id)
|
||||
try:
|
||||
ret = match.group(4)
|
||||
except IndexError:
|
||||
raise InvalidVideoIdException(url_or_id)
|
||||
|
||||
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
|
||||
raise InvalidVideoIdException(url_or_id)
|
||||
return ret
|
||||
Reference in New Issue
Block a user