Fix parsing info

This commit is contained in:
taizan-hokouto
2020-11-03 15:44:44 +09:00
parent a37602e666
commit 5eb8bdbd0e
3 changed files with 29 additions and 9 deletions

View File

@@ -94,10 +94,13 @@ class Runner:
path = util.checkpath(separated_path + video_id + '.html')
try:
info = VideoInfo(video_id)
except Exception as e:
except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().save_error_data:
util.save(str(e), "ERR", ".dat")
util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue
print(f"\n"

View File

@@ -76,6 +76,6 @@ class PatternUnmatchError(VideoInfoParseError):
'''
Thrown when failed to parse video info with unmatched pattern.
'''
def __init__(self, doc):
def __init__(self, doc=''):
self.msg = "PatternUnmatchError"
self.doc = doc

View File

@@ -8,8 +8,8 @@ from ..util.extract_video_id import extract_video_id
headers = config.headers
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
pattern2 = re.compile(r"yt\.setConfig\((\{[\s\S]*?\})\);")
item_channel_id = [
"videoDetails",
@@ -31,6 +31,10 @@ item_response = [
"embedded_player_response"
]
item_response2 = [
"PLAYER_VARS",
"embedded_player_response"
]
item_author_image = [
"videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer",
@@ -83,6 +87,7 @@ class VideoInfo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
self.client = httpx.Client(http2=True)
self.new_pattern_text = False
err = None
for _ in range(3):
try:
@@ -90,7 +95,6 @@ class VideoInfo:
self._parse(text)
break
except (InvalidVideoIdException, UnknownConnectionError) as e:
print(str(e))
raise e
except Exception as e:
err = e
@@ -118,12 +122,25 @@ class VideoInfo:
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError(doc=text)
result = re.search(pattern2, text)
if result is None:
raise PatternUnmatchError(doc=text)
else:
self.new_pattern_text = True
decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response)
if self.new_pattern_text:
res = decoder.raw_decode(result.group(1))[0]
else:
res = decoder.raw_decode(result.group(1)[:-1])[0]
if self.new_pattern_text:
response = self._get_item(res, item_response2)
else:
response = self._get_item(res, item_response)
if response is None:
self._check_video_is_private(res.get("args"))
if self.new_pattern_text:
self._check_video_is_private(res.get("PLAYER_VARS"))
else:
self._check_video_is_private(res.get("args"))
self._renderer = self._get_item(json.loads(response), item_renderer)
if self._renderer is None:
raise InvalidVideoIdException(