Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39d99ad4af | ||
|
|
3675c91240 | ||
|
|
46258f625a | ||
|
|
2cc161b589 | ||
|
|
115277e5e1 | ||
|
|
ebf0e7c181 | ||
|
|
3106b3e545 | ||
|
|
50816a661d | ||
|
|
6755bc8bb2 | ||
|
|
26be989b9b | ||
|
|
73ad0a1f44 | ||
|
|
66b185ebf7 | ||
|
|
71650c39f7 | ||
|
|
488445c73b | ||
|
|
075e811efe | ||
|
|
58d9bf7fdb | ||
|
|
b3e6275de7 | ||
|
|
748778f545 | ||
|
|
e29b3b8377 | ||
|
|
0859ed5fb1 | ||
|
|
a80d5ba080 | ||
|
|
b7e6043a71 | ||
|
|
820ba35013 | ||
|
|
ecd2d130bf | ||
|
|
f77a2c889b | ||
|
|
47d5ab288f | ||
|
|
5f53fd24dd | ||
|
|
11a9d0e2d7 |
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.2.2'
|
__version__ = '0.3.2'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -1,13 +1,17 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
try:
|
||||||
|
from asyncio import CancelledError
|
||||||
|
except ImportError:
|
||||||
|
from asyncio.futures import CancelledError
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import time
|
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
|
||||||
from .arguments import Arguments
|
from .arguments import Arguments
|
||||||
from .progressbar import ProgressBar
|
from .progressbar import ProgressBar
|
||||||
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
|
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
|
||||||
from .. processors.html_archiver import HTMLArchiver
|
from .. processors.html_archiver import HTMLArchiver
|
||||||
from .. tool.extract.extractor import Extractor
|
from .. tool.extract.extractor import Extractor
|
||||||
from .. tool.videoinfo import VideoInfo
|
from .. tool.videoinfo import VideoInfo
|
||||||
@@ -37,6 +41,7 @@ def main():
|
|||||||
help='Save error data when error occurs(".dat" file)')
|
help='Save error data when error occurs(".dat" file)')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Show version')
|
help='Show version')
|
||||||
|
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
|
|
||||||
if Arguments().print_version:
|
if Arguments().print_version:
|
||||||
@@ -47,75 +52,106 @@ def main():
|
|||||||
if not Arguments().video_ids:
|
if not Arguments().video_ids:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return
|
return
|
||||||
for counter, video_id in enumerate(Arguments().video_ids):
|
|
||||||
if '[' in video_id:
|
|
||||||
video_id = video_id.replace('[', '').replace(']', '')
|
|
||||||
try:
|
|
||||||
video_id = extract_video_id(video_id)
|
|
||||||
if os.path.exists(Arguments().output):
|
|
||||||
path = Path(Arguments().output + video_id + '.html')
|
|
||||||
else:
|
|
||||||
raise FileNotFoundError
|
|
||||||
err = None
|
|
||||||
for _ in range(3): # retry 3 times
|
|
||||||
try:
|
|
||||||
info = VideoInfo(video_id)
|
|
||||||
break
|
|
||||||
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
|
|
||||||
err = e
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
|
||||||
util.save(err.doc, "ERR", ".dat")
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
if not os.path.exists(Arguments().output):
|
||||||
|
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
Runner().run()
|
||||||
|
except CancelledError as e:
|
||||||
|
print(str(e))
|
||||||
|
|
||||||
|
|
||||||
|
class Runner:
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
ex = None
|
||||||
|
pbar = None
|
||||||
|
for counter, video_id in enumerate(Arguments().video_ids):
|
||||||
if len(Arguments().video_ids) > 1:
|
if len(Arguments().video_ids) > 1:
|
||||||
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
||||||
print(f"\n"
|
|
||||||
f" video_id: {video_id}\n"
|
|
||||||
f" channel: {info.get_channel_name()}\n"
|
|
||||||
f" title: {info.get_title()}")
|
|
||||||
|
|
||||||
print(f" output path: {path.resolve()}")
|
try:
|
||||||
duration = info.get_duration()
|
video_id = extract_video_id(video_id)
|
||||||
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
separated_path = str(Path(Arguments().output)) + os.path.sep
|
||||||
ex = Extractor(video_id,
|
path = util.checkpath(separated_path + video_id + '.html')
|
||||||
callback=pbar._disp,
|
try:
|
||||||
div=10)
|
info = VideoInfo(video_id)
|
||||||
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
except Exception as e:
|
||||||
data = ex.extract()
|
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
|
||||||
if data == []:
|
if Arguments().save_error_data:
|
||||||
return False
|
util.save(str(e), "ERR", ".dat")
|
||||||
pbar.reset("#", "=", total=len(data), status="Rendering ")
|
continue
|
||||||
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
|
|
||||||
processor.process(
|
|
||||||
[{'video_id': None,
|
|
||||||
'timeout': 1,
|
|
||||||
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
|
||||||
)
|
|
||||||
processor.finalize()
|
|
||||||
pbar.reset('#', '#', status='Completed ')
|
|
||||||
pbar.close()
|
|
||||||
print()
|
|
||||||
if pbar.is_cancelled():
|
|
||||||
print("\nThe extraction process has been discontinued.\n")
|
|
||||||
except InvalidVideoIdException:
|
|
||||||
print("Invalid Video ID or URL:", video_id)
|
|
||||||
except NoContents as e:
|
|
||||||
print(e)
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
|
||||||
except JSONDecodeError as e:
|
|
||||||
print(e.msg)
|
|
||||||
print("JSONDecodeError.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
|
||||||
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
|
||||||
|
|
||||||
return
|
print(f"\n"
|
||||||
|
f" video_id: {video_id}\n"
|
||||||
|
f" channel: {info.get_channel_name()}\n"
|
||||||
|
f" title: {info.get_title()}\n"
|
||||||
|
f" output path: {path}")
|
||||||
|
|
||||||
|
duration = info.get_duration()
|
||||||
|
pbar = ProgressBar(total=(duration * 1000), status_txt="Extracting")
|
||||||
|
ex = Extractor(video_id,
|
||||||
|
callback=pbar.disp,
|
||||||
|
div=10)
|
||||||
|
signal.signal(signal.SIGINT, (lambda a, b: self.cancel(ex, pbar)))
|
||||||
|
|
||||||
|
data = ex.extract()
|
||||||
|
if data == []:
|
||||||
|
continue
|
||||||
|
pbar.reset("#", "=", total=len(data), status_txt="Rendering ")
|
||||||
|
processor = HTMLArchiver(path, callback=pbar.disp)
|
||||||
|
processor.process(
|
||||||
|
[{'video_id': None,
|
||||||
|
'timeout': 1,
|
||||||
|
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
|
)
|
||||||
|
processor.finalize()
|
||||||
|
pbar.reset('#', '#', status_txt='Completed ')
|
||||||
|
pbar.close()
|
||||||
|
print()
|
||||||
|
if pbar.is_cancelled():
|
||||||
|
print("\nThe extraction process has been discontinued.\n")
|
||||||
|
except InvalidVideoIdException:
|
||||||
|
print("Invalid Video ID or URL:", video_id)
|
||||||
|
except NoContents as e:
|
||||||
|
print(f"Abort:{str(e)}:[{video_id}]")
|
||||||
|
except (JSONDecodeError, PatternUnmatchError) as e:
|
||||||
|
print("{}:{}".format(e.msg, video_id))
|
||||||
|
if Arguments().save_error_data:
|
||||||
|
util.save(e.doc, "ERR_", ".dat")
|
||||||
|
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
|
||||||
|
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Abort:{str(type(e))} {str(e)[:80]}")
|
||||||
|
finally:
|
||||||
|
clear_tasks()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def cancel(self, ex=None, pbar=None) -> None:
|
||||||
|
'''Called when keyboard interrupted has occurred.
|
||||||
|
'''
|
||||||
|
print("\nKeyboard interrupted.\n")
|
||||||
|
if ex and pbar:
|
||||||
|
ex.cancel()
|
||||||
|
pbar.cancel()
|
||||||
|
|
||||||
|
|
||||||
def cancel(ex, pbar):
|
def clear_tasks():
|
||||||
ex.cancel()
|
'''
|
||||||
pbar.cancel()
|
Clear remained tasks.
|
||||||
|
Called when internal exception has occurred or
|
||||||
|
after each extraction process is completed.
|
||||||
|
'''
|
||||||
|
async def _shutdown():
|
||||||
|
tasks = [t for t in asyncio.all_tasks()
|
||||||
|
if t is not asyncio.current_task()]
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.run_until_complete(_shutdown())
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|||||||
@@ -9,21 +9,20 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class ProgressBar:
|
class ProgressBar:
|
||||||
def __init__(self, total, status):
|
def __init__(self, total, status_txt):
|
||||||
self._bar_len = 60
|
self._bar_len = 60
|
||||||
self._cancelled = False
|
self._cancelled = False
|
||||||
self.reset(total=total, status=status)
|
self.reset(total=total, status_txt=status_txt)
|
||||||
self._blinker = 0
|
|
||||||
|
|
||||||
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
def reset(self, symbol_done="=", symbol_space=" ", total=100, status_txt=''):
|
||||||
self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
|
self._console_width = shutil.get_terminal_size(fallback=(80, 24)).columns
|
||||||
self._symbol_done = symbol_done
|
self._symbol_done = symbol_done
|
||||||
self._symbol_space = symbol_space
|
self._symbol_space = symbol_space
|
||||||
self._total = total
|
self._total = total
|
||||||
self._status = status
|
self._status_txt = status_txt
|
||||||
self._count = 0
|
self._count = 0
|
||||||
|
|
||||||
def _disp(self, _, fetched):
|
def disp(self, _, fetched):
|
||||||
self._progress(fetched, self._total)
|
self._progress(fetched, self._total)
|
||||||
|
|
||||||
def _progress(self, fillin, total):
|
def _progress(self, fillin, total):
|
||||||
@@ -39,11 +38,10 @@ class ProgressBar:
|
|||||||
|
|
||||||
bar = self._symbol_done * filled_len + \
|
bar = self._symbol_done * filled_len + \
|
||||||
self._symbol_space * (self._bar_len - filled_len)
|
self._symbol_space * (self._bar_len - filled_len)
|
||||||
disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
|
disp = f" [{bar}] {percents:>5.1f}% ...{self._status_txt} "[:self._console_width - 1] + '\r'
|
||||||
|
|
||||||
sys.stdout.write(disp)
|
sys.stdout.write(disp)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
self._blinker += 1
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if not self._cancelled:
|
if not self._cancelled:
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ class InvalidVideoIdException(Exception):
|
|||||||
self.doc = doc
|
self.doc = doc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class UnknownConnectionError(Exception):
|
class UnknownConnectionError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
|
import httpx
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import httpx
|
import time
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
|
from httpx import NetworkError, ReadTimeout
|
||||||
from .chat_processor import ChatProcessor
|
from .chat_processor import ChatProcessor
|
||||||
from .default.processor import DefaultProcessor
|
from .default.processor import DefaultProcessor
|
||||||
|
from ..exceptions import UnknownConnectionError
|
||||||
|
|
||||||
|
|
||||||
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
@@ -112,7 +115,18 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
for item in message_items)
|
for item in message_items)
|
||||||
|
|
||||||
def _encode_img(self, url):
|
def _encode_img(self, url):
|
||||||
resp = httpx.get(url)
|
err = None
|
||||||
|
for _ in range(5):
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, timeout=30)
|
||||||
|
break
|
||||||
|
except (NetworkError, ReadTimeout) as e:
|
||||||
|
print("Network Error. retrying...")
|
||||||
|
err = e
|
||||||
|
time.sleep(3)
|
||||||
|
else:
|
||||||
|
raise UnknownConnectionError(str(err))
|
||||||
|
|
||||||
return standard_b64encode(resp.content).decode()
|
return standard_b64encode(resp.content).decode()
|
||||||
|
|
||||||
def _set_emoji_table(self, item: dict):
|
def _set_emoji_table(self, item: dict):
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import httpx
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
import socket
|
||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . worker import ExtractWorker
|
from . worker import ExtractWorker
|
||||||
@@ -8,14 +9,19 @@ from ... import config
|
|||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
from ... exceptions import UnknownConnectionError
|
from ... exceptions import UnknownConnectionError
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
|
from httpx import NetworkError, TimeoutException, ConnectError
|
||||||
from json import JSONDecodeError
|
from json import JSONDecodeError
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
||||||
"get_live_chat_replay?continuation="
|
"get_live_chat_replay?continuation="
|
||||||
MAX_RETRY_COUNT = 3
|
MAX_RETRY_COUNT = 3
|
||||||
|
|
||||||
|
# Set to avoid duplicate parameters
|
||||||
|
param_set = set()
|
||||||
|
|
||||||
|
|
||||||
def _split(start, end, count, min_interval_sec=120):
|
def _split(start, end, count, min_interval_sec=120):
|
||||||
"""
|
"""
|
||||||
@@ -50,6 +56,7 @@ def _split(start, end, count, min_interval_sec=120):
|
|||||||
|
|
||||||
|
|
||||||
def ready_blocks(video_id, duration, div, callback):
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
|
param_set.clear()
|
||||||
if div <= 0:
|
if div <= 0:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
@@ -62,16 +69,24 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
async def _create_block(session, video_id, seektime, callback):
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
continuation = arcparam.getparam(video_id, seektime=seektime)
|
continuation = arcparam.getparam(video_id, seektime=seektime)
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
|
err = None
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
resp = await session.get(url, headers=headers)
|
if continuation in param_set:
|
||||||
|
next_continuation, actions = None, []
|
||||||
|
break
|
||||||
|
param_set.add(continuation)
|
||||||
|
resp = await session.get(url, headers=headers, timeout=10)
|
||||||
next_continuation, actions = parser.parse(resp.json())
|
next_continuation, actions = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
except (NetworkError, TimeoutException, ConnectError) as e:
|
||||||
|
err = e
|
||||||
|
await asyncio.sleep(3)
|
||||||
else:
|
else:
|
||||||
cancel()
|
cancel()
|
||||||
raise UnknownConnectionError("Abort: Unknown connection error.")
|
raise UnknownConnectionError("Abort:" + str(err))
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
@@ -110,16 +125,27 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
|
|
||||||
async def _fetch(continuation, session) -> Patch:
|
async def _fetch(continuation, session) -> Patch:
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
|
err = None
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
|
if continuation in param_set:
|
||||||
|
continuation, actions = None, []
|
||||||
|
break
|
||||||
|
param_set.add(continuation)
|
||||||
resp = await session.get(url, headers=config.headers)
|
resp = await session.get(url, headers=config.headers)
|
||||||
continuation, actions = parser.parse(resp.json())
|
continuation, actions = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
except (NetworkError, TimeoutException, ConnectError) as e:
|
||||||
|
err = e
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
except socket.error as error:
|
||||||
|
print("socket error", error.errno)
|
||||||
|
await asyncio.sleep(3)
|
||||||
else:
|
else:
|
||||||
cancel()
|
cancel()
|
||||||
raise UnknownConnectionError("Abort: Unknown connection error.")
|
raise UnknownConnectionError("Abort:" + str(err))
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
last = parser.get_offset(actions[-1])
|
last = parser.get_offset(actions[-1])
|
||||||
@@ -140,15 +166,10 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
|
|
||||||
|
|
||||||
async def _shutdown():
|
async def _shutdown():
|
||||||
print("\nshutdown...")
|
|
||||||
tasks = [t for t in asyncio.all_tasks()
|
tasks = [t for t in asyncio.all_tasks()
|
||||||
if t is not asyncio.current_task()]
|
if t is not asyncio.current_task()]
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
task.cancel()
|
task.cancel()
|
||||||
try:
|
|
||||||
await task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def cancel():
|
def cancel():
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from typing import Tuple
|
|||||||
class ExtractWorker:
|
class ExtractWorker:
|
||||||
"""
|
"""
|
||||||
ExtractWorker associates a download session with a block.
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
When the worker finishes fetching, the block
|
When the worker finishes fetching, the block
|
||||||
being fetched is splitted and assigned the free worker.
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
|
import httpx
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import httpx
|
import time
|
||||||
|
from httpx import ConnectError, NetworkError, TimeoutException
|
||||||
from .. import config
|
from .. import config
|
||||||
from ..exceptions import InvalidVideoIdException, PatternUnmatchError
|
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
|
||||||
from ..util.extract_video_id import extract_video_id
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
headers = config.headers
|
|
||||||
|
|
||||||
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
|
headers = config.headers
|
||||||
|
|
||||||
|
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
|
||||||
|
|
||||||
item_channel_id = [
|
item_channel_id = [
|
||||||
"videoDetails",
|
"videoDetails",
|
||||||
@@ -80,19 +83,42 @@ class VideoInfo:
|
|||||||
|
|
||||||
def __init__(self, video_id):
|
def __init__(self, video_id):
|
||||||
self.video_id = extract_video_id(video_id)
|
self.video_id = extract_video_id(video_id)
|
||||||
text = self._get_page_text(self.video_id)
|
err = None
|
||||||
self._parse(text)
|
for _ in range(3):
|
||||||
|
try:
|
||||||
|
text = self._get_page_text(self.video_id)
|
||||||
|
self._parse(text)
|
||||||
|
break
|
||||||
|
except (InvalidVideoIdException, UnknownConnectionError) as e:
|
||||||
|
print(str(e))
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
err = e
|
||||||
|
time.sleep(2)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise err
|
||||||
|
|
||||||
def _get_page_text(self, video_id):
|
def _get_page_text(self, video_id):
|
||||||
url = f"https://www.youtube.com/embed/{video_id}"
|
url = f"https://www.youtube.com/embed/{video_id}"
|
||||||
resp = httpx.get(url, headers=headers)
|
err = None
|
||||||
resp.raise_for_status()
|
for _ in range(3):
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, headers=headers)
|
||||||
|
resp.raise_for_status()
|
||||||
|
break
|
||||||
|
except (ConnectError, NetworkError, TimeoutException) as e:
|
||||||
|
err = e
|
||||||
|
time.sleep(3)
|
||||||
|
else:
|
||||||
|
raise UnknownConnectionError(str(err))
|
||||||
|
|
||||||
return resp.text
|
return resp.text
|
||||||
|
|
||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
result = re.search(pattern, text)
|
result = re.search(pattern, text)
|
||||||
if result is None:
|
if result is None:
|
||||||
raise PatternUnmatchError(text)
|
raise PatternUnmatchError(doc=text)
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
||||||
response = self._get_item(res, item_response)
|
response = self._get_item(res, item_response)
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
|
import datetime
|
||||||
import httpx
|
import httpx
|
||||||
import json
|
import json
|
||||||
import datetime
|
import os
|
||||||
|
import re
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
|
||||||
|
|
||||||
def extract(url):
|
def extract(url):
|
||||||
_session = httpx.Client(http2=True)
|
_session = httpx.Client(http2=True)
|
||||||
@@ -16,3 +20,21 @@ def save(data, filename, extention):
|
|||||||
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
|
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
|
||||||
mode='w', encoding='utf-8') as f:
|
mode='w', encoding='utf-8') as f:
|
||||||
f.writelines(data)
|
f.writelines(data)
|
||||||
|
|
||||||
|
|
||||||
|
def checkpath(filepath):
|
||||||
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
|
body = splitter[0]
|
||||||
|
extention = splitter[1]
|
||||||
|
newpath = filepath
|
||||||
|
counter = 1
|
||||||
|
while os.path.exists(newpath):
|
||||||
|
match = re.search(PATTERN, body)
|
||||||
|
if match:
|
||||||
|
counter = int(match[2]) + 1
|
||||||
|
num_with_bracket = f'({str(counter)})'
|
||||||
|
body = f'{match[1]}{num_with_bracket}'
|
||||||
|
else:
|
||||||
|
body = f'{body}({str(counter)})'
|
||||||
|
newpath = os.path.join(os.path.dirname(filepath), body + extention)
|
||||||
|
return newpath
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ YT_VIDEO_ID_LENGTH = 11
|
|||||||
|
|
||||||
def extract_video_id(url_or_id: str) -> str:
|
def extract_video_id(url_or_id: str) -> str:
|
||||||
ret = ''
|
ret = ''
|
||||||
|
if '[' in url_or_id:
|
||||||
|
url_or_id = url_or_id.replace('[', '').replace(']', '')
|
||||||
|
|
||||||
if type(url_or_id) != str:
|
if type(url_or_id) != str:
|
||||||
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
||||||
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
||||||
|
|||||||
Reference in New Issue
Block a user