Fix parsing info

This commit is contained in:
taizan-hokouto
2020-11-03 15:44:44 +09:00
parent a37602e666
commit 5eb8bdbd0e
3 changed files with 29 additions and 9 deletions

View File

@@ -94,10 +94,13 @@ class Runner:
path = util.checkpath(separated_path + video_id + '.html') path = util.checkpath(separated_path + video_id + '.html')
try: try:
info = VideoInfo(video_id) info = VideoInfo(video_id)
except Exception as e: except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e))) print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().save_error_data: if Arguments().save_error_data:
util.save(str(e), "ERR", ".dat") util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue continue
print(f"\n" print(f"\n"

View File

@@ -76,6 +76,6 @@ class PatternUnmatchError(VideoInfoParseError):
''' '''
Thrown when failed to parse video info with unmatched pattern. Thrown when failed to parse video info with unmatched pattern.
''' '''
def __init__(self, doc): def __init__(self, doc=''):
self.msg = "PatternUnmatchError" self.msg = "PatternUnmatchError"
self.doc = doc self.doc = doc

View File

@@ -8,8 +8,8 @@ from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})") pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
pattern2 = re.compile(r"yt\.setConfig\((\{[\s\S]*?\})\);")
item_channel_id = [ item_channel_id = [
"videoDetails", "videoDetails",
@@ -31,6 +31,10 @@ item_response = [
"embedded_player_response" "embedded_player_response"
] ]
item_response2 = [
"PLAYER_VARS",
"embedded_player_response"
]
item_author_image = [ item_author_image = [
"videoDetails", "videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer", "embeddedPlayerOverlayVideoDetailsRenderer",
@@ -83,6 +87,7 @@ class VideoInfo:
def __init__(self, video_id): def __init__(self, video_id):
self.video_id = extract_video_id(video_id) self.video_id = extract_video_id(video_id)
self.client = httpx.Client(http2=True) self.client = httpx.Client(http2=True)
self.new_pattern_text = False
err = None err = None
for _ in range(3): for _ in range(3):
try: try:
@@ -90,7 +95,6 @@ class VideoInfo:
self._parse(text) self._parse(text)
break break
except (InvalidVideoIdException, UnknownConnectionError) as e: except (InvalidVideoIdException, UnknownConnectionError) as e:
print(str(e))
raise e raise e
except Exception as e: except Exception as e:
err = e err = e
@@ -118,12 +122,25 @@ class VideoInfo:
def _parse(self, text): def _parse(self, text):
result = re.search(pattern, text) result = re.search(pattern, text)
if result is None: if result is None:
raise PatternUnmatchError(doc=text) result = re.search(pattern2, text)
if result is None:
raise PatternUnmatchError(doc=text)
else:
self.new_pattern_text = True
decoder = json.JSONDecoder() decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0] if self.new_pattern_text:
response = self._get_item(res, item_response) res = decoder.raw_decode(result.group(1))[0]
else:
res = decoder.raw_decode(result.group(1)[:-1])[0]
if self.new_pattern_text:
response = self._get_item(res, item_response2)
else:
response = self._get_item(res, item_response)
if response is None: if response is None:
self._check_video_is_private(res.get("args")) if self.new_pattern_text:
self._check_video_is_private(res.get("PLAYER_VARS"))
else:
self._check_video_is_private(res.get("args"))
self._renderer = self._get_item(json.loads(response), item_renderer) self._renderer = self._get_item(json.loads(response), item_renderer)
if self._renderer is None: if self._renderer is None:
raise InvalidVideoIdException( raise InvalidVideoIdException(