Make it possible to extract video id from url

This commit is contained in:
taizan-hokuto
2020-07-24 14:03:07 +09:00
parent 5ab653a1b2
commit 0abf8dd9f0
8 changed files with 54 additions and 16 deletions

View File

@@ -1,5 +1,6 @@
import argparse import argparse
from pathlib import Path from pathlib import Path
from pytchat.util.extract_video_id import extract_video_id
from .arguments import Arguments from .arguments import Arguments
from .. exceptions import InvalidVideoIdException, NoContents from .. exceptions import InvalidVideoIdException, NoContents
from .. processors.html_archiver import HTMLArchiver from .. processors.html_archiver import HTMLArchiver
@@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
def main(): def main():
# Arguments # Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, # parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
help='Video IDs separated by commas without space.\n' # help='Video ID, or URL that includes id.\n'
# 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.') 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version') help='Show version')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
if Arguments().print_version: if Arguments().print_version:
print(f'pytchat v{__version__}') print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return return
# Extractor # Extractor
@@ -43,14 +47,16 @@ def main():
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}") f" title: {info.get_title()}")
path = Path(Arguments().output + video_id + '.html') path = Path(Arguments().output + video_id + '.html')
print(f"output path: {path.resolve()}") print(f" output path: {path.resolve()}")
Extractor(video_id, Extractor(video_id,
processor=HTMLArchiver( processor=HTMLArchiver(
Arguments().output + video_id + '.html'), Arguments().output + video_id + '.html'),
callback=_disp_progress callback=_disp_progress
).extract() ).extract()
print("\nExtraction end.\n") print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContents) as e: except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e:
print(e) print(e)
return return
parser.print_help() parser.print_help()

View File

@@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton):
class Name: class Name:
VERSION: str = 'version' VERSION: str = 'version'
OUTPUT: str = 'output' OUTPUT: str = 'output_dir'
VIDEO: str = 'video' VIDEO_IDS: str = 'video_id'
def __init__(self, def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None): arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT] self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = [] self.video_ids: List[int] = []
# Videos # Videos
if arguments[Arguments.Name.VIDEO]: if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO].split(',')] for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]

View File

@@ -15,6 +15,7 @@ from .. import exceptions
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
@@ -86,7 +87,7 @@ class LiveChatAsync:
topchat_only=False, topchat_only=False,
logger=config.logger(__name__), logger=config.logger(__name__),
): ):
self._video_id = video_id self._video_id = extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)

View File

@@ -14,6 +14,7 @@ from .. import exceptions
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
@@ -84,7 +85,7 @@ class LiveChat:
topchat_only=False, topchat_only=False,
logger=config.logger(__name__) logger=config.logger(__name__)
): ):
self._video_id = video_id self._video_id = extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)

View File

@@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor):
super().__init__() super().__init__()
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor() self.processor = DefaultProcessor()
self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary. self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML] self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)] self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]

View File

@@ -3,6 +3,7 @@ from . import duplcheck
from .. videoinfo import VideoInfo from .. videoinfo import VideoInfo
from ... import config from ... import config
from ... exceptions import InvalidVideoIdException from ... exceptions import InvalidVideoIdException
from ... util.extract_video_id import extract_video_id
logger = config.logger(__name__) logger = config.logger(__name__)
headers = config.headers headers = config.headers
@@ -14,7 +15,7 @@ class Extractor:
raise ValueError('div must be positive integer.') raise ValueError('div must be positive integer.')
elif div > 10: elif div > 10:
div = 10 div = 10
self.video_id = video_id self.video_id = extract_video_id(video_id)
self.div = div self.div = div
self.callback = callback self.callback = callback
self.processor = processor self.processor = processor

View File

@@ -3,6 +3,7 @@ import re
import requests import requests
from .. import config from .. import config
from ..exceptions import InvalidVideoIdException from ..exceptions import InvalidVideoIdException
from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
@@ -78,8 +79,8 @@ class VideoInfo:
''' '''
def __init__(self, video_id): def __init__(self, video_id):
self.video_id = video_id self.video_id = extract_video_id(video_id)
text = self._get_page_text(video_id) text = self._get_page_text(self.video_id)
self._parse(text) self._parse(text)
def _get_page_text(self, video_id): def _get_page_text(self, video_id):

View File

@@ -0,0 +1,25 @@
import re
from .. exceptions import InvalidVideoIdException
PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
YT_VIDEO_ID_LENGTH = 11
def extract_video_id(url_or_id: str) -> str:
ret = ''
if type(url_or_id) != str:
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
return url_or_id
match = re.search(PATTERN, url_or_id)
if match is None:
raise InvalidVideoIdException(url_or_id)
try:
ret = match.group(4)
except IndexError:
raise InvalidVideoIdException(url_or_id)
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(url_or_id)
return ret