Make it possible to extract video id from url

This commit is contained in:
taizan-hokuto
2020-07-24 14:03:07 +09:00
parent 5ab653a1b2
commit 0abf8dd9f0
8 changed files with 54 additions and 16 deletions

View File

@@ -1,5 +1,6 @@
import argparse
from pathlib import Path
from pytchat.util.extract_video_id import extract_video_id
from .arguments import Arguments
from .. exceptions import InvalidVideoIdException, NoContents
from .. processors.html_archiver import HTMLArchiver
@@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
def main():
# Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
help='Video IDs separated by commas without space.\n'
# parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
# help='Video ID, or URL that includes id.\n'
# 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version')
help='Show version')
Arguments(parser.parse_args().__dict__)
if Arguments().print_version:
print(f'pytchat v{__version__}')
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return
# Extractor
@@ -43,14 +47,16 @@ def main():
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
path = Path(Arguments().output + video_id + '.html')
print(f"output path: {path.resolve()}")
print(f" output path: {path.resolve()}")
Extractor(video_id,
processor=HTMLArchiver(
Arguments().output + video_id + '.html'),
callback=_disp_progress
).extract()
print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContents) as e:
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e:
print(e)
return
parser.print_help()

View File

@@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton):
class Name:
VERSION: str = 'version'
OUTPUT: str = 'output'
VIDEO: str = 'video'
OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id'
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = []
# Videos
if arguments[Arguments.Name.VIDEO]:
if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO].split(',')]
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]

View File

@@ -15,6 +15,7 @@ from .. import exceptions
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
@@ -86,7 +87,7 @@ class LiveChatAsync:
topchat_only=False,
logger=config.logger(__name__),
):
self._video_id = video_id
self._video_id = extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)

View File

@@ -14,6 +14,7 @@ from .. import exceptions
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
@@ -84,7 +85,7 @@ class LiveChat:
topchat_only=False,
logger=config.logger(__name__)
):
self._video_id = video_id
self._video_id = extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)

View File

@@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor):
super().__init__()
self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor()
self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]

View File

@@ -3,6 +3,7 @@ from . import duplcheck
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
from ... util.extract_video_id import extract_video_id
logger = config.logger(__name__)
headers = config.headers
@@ -14,7 +15,7 @@ class Extractor:
raise ValueError('div must be positive integer.')
elif div > 10:
div = 10
self.video_id = video_id
self.video_id = extract_video_id(video_id)
self.div = div
self.callback = callback
self.processor = processor

View File

@@ -3,6 +3,7 @@ import re
import requests
from .. import config
from ..exceptions import InvalidVideoIdException
from ..util.extract_video_id import extract_video_id
headers = config.headers
@@ -78,8 +79,8 @@ class VideoInfo:
'''
def __init__(self, video_id):
self.video_id = video_id
text = self._get_page_text(video_id)
self.video_id = extract_video_id(video_id)
text = self._get_page_text(self.video_id)
self._parse(text)
def _get_page_text(self, video_id):

View File

@@ -0,0 +1,25 @@
import re
from .. exceptions import InvalidVideoIdException
PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
YT_VIDEO_ID_LENGTH = 11
def extract_video_id(url_or_id: str) -> str:
ret = ''
if type(url_or_id) != str:
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
return url_or_id
match = re.search(PATTERN, url_or_id)
if match is None:
raise InvalidVideoIdException(url_or_id)
try:
ret = match.group(4)
except IndexError:
raise InvalidVideoIdException(url_or_id)
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(url_or_id)
return ret