Make it possible to extract video id from url
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from pytchat.util.extract_video_id import extract_video_id
|
||||||
from .arguments import Arguments
|
from .arguments import Arguments
|
||||||
from .. exceptions import InvalidVideoIdException, NoContents
|
from .. exceptions import InvalidVideoIdException, NoContents
|
||||||
from .. processors.html_archiver import HTMLArchiver
|
from .. processors.html_archiver import HTMLArchiver
|
||||||
@@ -19,16 +20,19 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
|||||||
def main():
|
def main():
|
||||||
# Arguments
|
# Arguments
|
||||||
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
# parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
|
||||||
help='Video IDs separated by commas without space.\n'
|
# help='Video ID, or URL that includes id.\n'
|
||||||
|
# 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
|
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
|
||||||
|
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
|
||||||
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||||
help='Output directory (end with "/"). default="./"', default='./')
|
help='Output directory (end with "/"). default="./"', default='./')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Settings version')
|
help='Show version')
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
if Arguments().print_version:
|
if Arguments().print_version:
|
||||||
print(f'pytchat v{__version__}')
|
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
||||||
return
|
return
|
||||||
|
|
||||||
# Extractor
|
# Extractor
|
||||||
@@ -43,14 +47,16 @@ def main():
|
|||||||
f" channel: {info.get_channel_name()}\n"
|
f" channel: {info.get_channel_name()}\n"
|
||||||
f" title: {info.get_title()}")
|
f" title: {info.get_title()}")
|
||||||
path = Path(Arguments().output + video_id + '.html')
|
path = Path(Arguments().output + video_id + '.html')
|
||||||
print(f"output path: {path.resolve()}")
|
print(f" output path: {path.resolve()}")
|
||||||
Extractor(video_id,
|
Extractor(video_id,
|
||||||
processor=HTMLArchiver(
|
processor=HTMLArchiver(
|
||||||
Arguments().output + video_id + '.html'),
|
Arguments().output + video_id + '.html'),
|
||||||
callback=_disp_progress
|
callback=_disp_progress
|
||||||
).extract()
|
).extract()
|
||||||
print("\nExtraction end.\n")
|
print("\nExtraction end.\n")
|
||||||
except (InvalidVideoIdException, NoContents) as e:
|
except InvalidVideoIdException:
|
||||||
|
print("Invalid Video ID or URL:", video_id)
|
||||||
|
except (TypeError, NoContents) as e:
|
||||||
print(e)
|
print(e)
|
||||||
return
|
return
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ class Arguments(metaclass=Singleton):
|
|||||||
|
|
||||||
class Name:
|
class Name:
|
||||||
VERSION: str = 'version'
|
VERSION: str = 'version'
|
||||||
OUTPUT: str = 'output'
|
OUTPUT: str = 'output_dir'
|
||||||
VIDEO: str = 'video'
|
VIDEO_IDS: str = 'video_id'
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||||
@@ -35,6 +35,9 @@ class Arguments(metaclass=Singleton):
|
|||||||
self.output: str = arguments[Arguments.Name.OUTPUT]
|
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||||
self.video_ids: List[int] = []
|
self.video_ids: List[int] = []
|
||||||
# Videos
|
# Videos
|
||||||
if arguments[Arguments.Name.VIDEO]:
|
if arguments[Arguments.Name.VIDEO_IDS]:
|
||||||
self.video_ids = [video_id
|
self.video_ids = [video_id
|
||||||
for video_id in arguments[Arguments.Name.VIDEO].split(',')]
|
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from .. import exceptions
|
|||||||
from ..paramgen import liveparam, arcparam
|
from ..paramgen import liveparam, arcparam
|
||||||
from ..processors.default.processor import DefaultProcessor
|
from ..processors.default.processor import DefaultProcessor
|
||||||
from ..processors.combinator import Combinator
|
from ..processors.combinator import Combinator
|
||||||
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
MAX_RETRY = 10
|
MAX_RETRY = 10
|
||||||
@@ -86,7 +87,7 @@ class LiveChatAsync:
|
|||||||
topchat_only=False,
|
topchat_only=False,
|
||||||
logger=config.logger(__name__),
|
logger=config.logger(__name__),
|
||||||
):
|
):
|
||||||
self._video_id = video_id
|
self._video_id = extract_video_id(video_id)
|
||||||
self.seektime = seektime
|
self.seektime = seektime
|
||||||
if isinstance(processor, tuple):
|
if isinstance(processor, tuple):
|
||||||
self.processor = Combinator(processor)
|
self.processor = Combinator(processor)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from .. import exceptions
|
|||||||
from ..paramgen import liveparam, arcparam
|
from ..paramgen import liveparam, arcparam
|
||||||
from ..processors.default.processor import DefaultProcessor
|
from ..processors.default.processor import DefaultProcessor
|
||||||
from ..processors.combinator import Combinator
|
from ..processors.combinator import Combinator
|
||||||
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
MAX_RETRY = 10
|
MAX_RETRY = 10
|
||||||
@@ -84,7 +85,7 @@ class LiveChat:
|
|||||||
topchat_only=False,
|
topchat_only=False,
|
||||||
logger=config.logger(__name__)
|
logger=config.logger(__name__)
|
||||||
):
|
):
|
||||||
self._video_id = video_id
|
self._video_id = extract_video_id(video_id)
|
||||||
self.seektime = seektime
|
self.seektime = seektime
|
||||||
if isinstance(processor, tuple):
|
if isinstance(processor, tuple):
|
||||||
self.processor = Combinator(processor)
|
self.processor = Combinator(processor)
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.save_path = self._checkpath(save_path)
|
self.save_path = self._checkpath(save_path)
|
||||||
self.processor = DefaultProcessor()
|
self.processor = DefaultProcessor()
|
||||||
self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||||
self.header = [HEADER_HTML]
|
self.header = [HEADER_HTML]
|
||||||
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from . import duplcheck
|
|||||||
from .. videoinfo import VideoInfo
|
from .. videoinfo import VideoInfo
|
||||||
from ... import config
|
from ... import config
|
||||||
from ... exceptions import InvalidVideoIdException
|
from ... exceptions import InvalidVideoIdException
|
||||||
|
from ... util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
@@ -14,7 +15,7 @@ class Extractor:
|
|||||||
raise ValueError('div must be positive integer.')
|
raise ValueError('div must be positive integer.')
|
||||||
elif div > 10:
|
elif div > 10:
|
||||||
div = 10
|
div = 10
|
||||||
self.video_id = video_id
|
self.video_id = extract_video_id(video_id)
|
||||||
self.div = div
|
self.div = div
|
||||||
self.callback = callback
|
self.callback = callback
|
||||||
self.processor = processor
|
self.processor = processor
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import re
|
|||||||
import requests
|
import requests
|
||||||
from .. import config
|
from .. import config
|
||||||
from ..exceptions import InvalidVideoIdException
|
from ..exceptions import InvalidVideoIdException
|
||||||
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
|
|
||||||
@@ -78,8 +79,8 @@ class VideoInfo:
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, video_id):
|
def __init__(self, video_id):
|
||||||
self.video_id = video_id
|
self.video_id = extract_video_id(video_id)
|
||||||
text = self._get_page_text(video_id)
|
text = self._get_page_text(self.video_id)
|
||||||
self._parse(text)
|
self._parse(text)
|
||||||
|
|
||||||
def _get_page_text(self, video_id):
|
def _get_page_text(self, video_id):
|
||||||
|
|||||||
25
pytchat/util/extract_video_id.py
Normal file
25
pytchat/util/extract_video_id.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import re
|
||||||
|
from .. exceptions import InvalidVideoIdException
|
||||||
|
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
|
||||||
|
YT_VIDEO_ID_LENGTH = 11
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_id(url_or_id: str) -> str:
|
||||||
|
ret = ''
|
||||||
|
if type(url_or_id) != str:
|
||||||
|
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
||||||
|
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
||||||
|
return url_or_id
|
||||||
|
match = re.search(PATTERN, url_or_id)
|
||||||
|
if match is None:
|
||||||
|
raise InvalidVideoIdException(url_or_id)
|
||||||
|
try:
|
||||||
|
ret = match.group(4)
|
||||||
|
except IndexError:
|
||||||
|
raise InvalidVideoIdException(url_or_id)
|
||||||
|
|
||||||
|
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
|
||||||
|
raise InvalidVideoIdException(url_or_id)
|
||||||
|
return ret
|
||||||
Reference in New Issue
Block a user