Fix handling json decode error and pattern unmatch
This commit is contained in:
@@ -2,7 +2,6 @@ import argparse
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import time
|
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
|
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
|
||||||
@@ -38,6 +37,7 @@ def main():
|
|||||||
help='Save error data when error occurs(".dat" file)')
|
help='Save error data when error occurs(".dat" file)')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Show version')
|
help='Show version')
|
||||||
|
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
|
|
||||||
if Arguments().print_version:
|
if Arguments().print_version:
|
||||||
@@ -48,39 +48,33 @@ def main():
|
|||||||
if not Arguments().video_ids:
|
if not Arguments().video_ids:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if not os.path.exists(Arguments().output):
|
||||||
|
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
|
||||||
|
return
|
||||||
|
|
||||||
for counter, video_id in enumerate(Arguments().video_ids):
|
for counter, video_id in enumerate(Arguments().video_ids):
|
||||||
if '[' in video_id:
|
|
||||||
video_id = video_id.replace('[', '').replace(']', '')
|
|
||||||
if len(Arguments().video_ids) > 1:
|
if len(Arguments().video_ids) > 1:
|
||||||
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
video_id = extract_video_id(video_id)
|
video_id = extract_video_id(video_id)
|
||||||
if not os.path.exists(Arguments().output):
|
|
||||||
raise FileNotFoundError
|
|
||||||
separated_path = str(Path(Arguments().output)) + os.path.sep
|
separated_path = str(Path(Arguments().output)) + os.path.sep
|
||||||
path = util.checkpath(separated_path + video_id + '.html')
|
path = util.checkpath(separated_path + video_id + '.html')
|
||||||
err = None
|
try:
|
||||||
for _ in range(3): # retry 3 times
|
info = VideoInfo(video_id)
|
||||||
try:
|
except Exception as e:
|
||||||
info = VideoInfo(video_id)
|
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
|
||||||
break
|
|
||||||
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
|
|
||||||
err = e
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
if Arguments().save_error_data:
|
||||||
util.save(err.doc, "ERR", ".dat")
|
util.save(str(e), "ERR", ".dat")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"\n"
|
print(f"\n"
|
||||||
f" video_id: {video_id}\n"
|
f" video_id: {video_id}\n"
|
||||||
f" channel: {info.get_channel_name()}\n"
|
f" channel: {info.get_channel_name()}\n"
|
||||||
f" title: {info.get_title()}")
|
f" title: {info.get_title()}\n"
|
||||||
|
f" output path: {path}")
|
||||||
|
|
||||||
print(f" output path: {path}")
|
|
||||||
duration = info.get_duration()
|
duration = info.get_duration()
|
||||||
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
||||||
ex = Extractor(video_id,
|
ex = Extractor(video_id,
|
||||||
@@ -107,17 +101,12 @@ def main():
|
|||||||
print("Invalid Video ID or URL:", video_id)
|
print("Invalid Video ID or URL:", video_id)
|
||||||
except NoContents as e:
|
except NoContents as e:
|
||||||
print(e)
|
print(e)
|
||||||
except FileNotFoundError:
|
except (JSONDecodeError, PatternUnmatchError) as e:
|
||||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
print("{}:{}".format(e.msg, video_id))
|
||||||
except JSONDecodeError as e:
|
|
||||||
print(e.msg)
|
|
||||||
print("JSONDecodeError.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
if Arguments().save_error_data:
|
||||||
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
util.save(e.doc, "ERR_", ".dat")
|
||||||
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
|
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
|
||||||
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
|
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
|
||||||
except PatternUnmatchError:
|
|
||||||
print(f"PatternUnmatchError [{video_id}]. ")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(type(e), str(e))
|
print(type(e), str(e))
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import httpx
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from httpx import ConnectError, NetworkError
|
from httpx import ConnectError, NetworkError, TimeoutException
|
||||||
from .. import config
|
from .. import config
|
||||||
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
|
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
|
||||||
from ..util.extract_video_id import extract_video_id
|
from ..util.extract_video_id import extract_video_id
|
||||||
@@ -83,16 +83,21 @@ class VideoInfo:
|
|||||||
|
|
||||||
def __init__(self, video_id):
|
def __init__(self, video_id):
|
||||||
self.video_id = extract_video_id(video_id)
|
self.video_id = extract_video_id(video_id)
|
||||||
|
err = None
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
try:
|
try:
|
||||||
text = self._get_page_text(self.video_id)
|
text = self._get_page_text(self.video_id)
|
||||||
self._parse(text)
|
self._parse(text)
|
||||||
break
|
break
|
||||||
except PatternUnmatchError:
|
except (InvalidVideoIdException, UnknownConnectionError) as e:
|
||||||
|
print(str(e))
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
err = e
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise PatternUnmatchError("Pattern Unmatch")
|
raise err
|
||||||
|
|
||||||
def _get_page_text(self, video_id):
|
def _get_page_text(self, video_id):
|
||||||
url = f"https://www.youtube.com/embed/{video_id}"
|
url = f"https://www.youtube.com/embed/{video_id}"
|
||||||
@@ -102,7 +107,7 @@ class VideoInfo:
|
|||||||
resp = httpx.get(url, headers=headers)
|
resp = httpx.get(url, headers=headers)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
break
|
break
|
||||||
except (ConnectError, NetworkError) as e:
|
except (ConnectError, NetworkError, TimeoutException) as e:
|
||||||
err = e
|
err = e
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
else:
|
else:
|
||||||
@@ -113,7 +118,7 @@ class VideoInfo:
|
|||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
result = re.search(pattern, text)
|
result = re.search(pattern, text)
|
||||||
if result is None:
|
if result is None:
|
||||||
raise PatternUnmatchError()
|
raise PatternUnmatchError(doc=text)
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
||||||
response = self._get_item(res, item_response)
|
response = self._get_item(res, item_response)
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ YT_VIDEO_ID_LENGTH = 11
|
|||||||
|
|
||||||
def extract_video_id(url_or_id: str) -> str:
|
def extract_video_id(url_or_id: str) -> str:
|
||||||
ret = ''
|
ret = ''
|
||||||
|
if '[' in url_or_id:
|
||||||
|
url_or_id = url_or_id.replace('[', '').replace(']', '')
|
||||||
|
|
||||||
if type(url_or_id) != str:
|
if type(url_or_id) != str:
|
||||||
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
||||||
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
||||||
|
|||||||
Reference in New Issue
Block a user