Fix handling json decode error and pattern unmatch

This commit is contained in:
taizan-hokouto
2020-10-05 21:38:51 +09:00
parent 3106b3e545
commit ebf0e7c181
3 changed files with 29 additions and 32 deletions

View File

@@ -2,7 +2,6 @@ import argparse
import os
import signal
import time
from json.decoder import JSONDecodeError
from pathlib import Path
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
@@ -38,6 +37,7 @@ def main():
help='Save error data when error occurs(".dat" file)')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
Arguments(parser.parse_args().__dict__)
if Arguments().print_version:
@@ -48,39 +48,33 @@ def main():
if not Arguments().video_ids:
parser.print_help()
return
if not os.path.exists(Arguments().output):
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
return
for counter, video_id in enumerate(Arguments().video_ids):
if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '')
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try:
video_id = extract_video_id(video_id)
if not os.path.exists(Arguments().output):
raise FileNotFoundError
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
err = None
for _ in range(3): # retry 3 times
try:
info = VideoInfo(video_id)
break
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
err = e
time.sleep(2)
continue
else:
print("Cannot parse video information.:{}".format(video_id))
try:
info = VideoInfo(video_id)
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().save_error_data:
util.save(err.doc, "ERR", ".dat")
util.save(str(e), "ERR", ".dat")
continue
print(f"\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
f" title: {info.get_title()}\n"
f" output path: {path}")
print(f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id,
@@ -107,17 +101,12 @@ def main():
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
print(e)
except FileNotFoundError:
print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e:
print(e.msg)
print("JSONDecodeError.:{}".format(video_id))
except (JSONDecodeError, PatternUnmatchError) as e:
print("{}:{}".format(e.msg, video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
util.save(e.doc, "ERR_", ".dat")
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
except PatternUnmatchError:
print(f"PatternUnmatchError [{video_id}]. ")
except Exception as e:
print(type(e), str(e))

View File

@@ -2,7 +2,7 @@ import httpx
import json
import re
import time
from httpx import ConnectError, NetworkError
from httpx import ConnectError, NetworkError, TimeoutException
from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id
@@ -83,16 +83,21 @@ class VideoInfo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
err = None
for _ in range(3):
try:
text = self._get_page_text(self.video_id)
self._parse(text)
break
except PatternUnmatchError:
except (InvalidVideoIdException, UnknownConnectionError) as e:
print(str(e))
raise e
except Exception as e:
err = e
time.sleep(2)
pass
else:
raise PatternUnmatchError("Pattern Unmatch")
raise err
def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}"
@@ -102,7 +107,7 @@ class VideoInfo:
resp = httpx.get(url, headers=headers)
resp.raise_for_status()
break
except (ConnectError, NetworkError) as e:
except (ConnectError, NetworkError, TimeoutException) as e:
err = e
time.sleep(3)
else:
@@ -113,7 +118,7 @@ class VideoInfo:
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError()
raise PatternUnmatchError(doc=text)
decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response)

View File

@@ -8,6 +8,9 @@ YT_VIDEO_ID_LENGTH = 11
def extract_video_id(url_or_id: str) -> str:
ret = ''
if '[' in url_or_id:
url_or_id = url_or_id.replace('[', '').replace(']', '')
if type(url_or_id) != str:
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
if len(url_or_id) == YT_VIDEO_ID_LENGTH: