From 5480e3e9edc64c39e3cc0ac196096c29e7e3fce7 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Fri, 28 Feb 2020 01:04:18 +0900 Subject: [PATCH] Modify video info --- pytchat/tool/extract/extractor.py | 2 +- pytchat/tool/mining/superchat_miner.py | 2 +- pytchat/tool/videoinfo.py | 139 +++++++++++++++++++++---- 3 files changed, 120 insertions(+), 23 deletions(-) diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index c53cda8..ba54394 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -24,7 +24,7 @@ class Extractor: def _get_duration_of_video(self, video_id): duration = 0 try: - duration = VideoInfo(video_id).get("duration") + duration = VideoInfo(video_id).duration except InvalidVideoIdException: raise return duration diff --git a/pytchat/tool/mining/superchat_miner.py b/pytchat/tool/mining/superchat_miner.py index d6052a8..626c42e 100644 --- a/pytchat/tool/mining/superchat_miner.py +++ b/pytchat/tool/mining/superchat_miner.py @@ -54,7 +54,7 @@ class SuperChatMiner: def extract(video_id, div = 1, callback = None, processor = None): duration = 0 try: - duration = VideoInfo(video_id).get("duration") + duration = VideoInfo(video_id).duration except InvalidVideoIdException: raise if duration == 0: diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index 1ac8561..c959220 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -7,36 +7,133 @@ from ..exceptions import InvalidVideoIdException headers = config.headers pattern=re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);") +item_channel_id =[ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnailEndpoint", + "channelThumbnailEndpoint", + "urlEndpoint", + "urlEndpoint", + "url" +] + +item_renderer = [ + "embedPreview", + "thumbnailPreviewRenderer" +] + +item_response = [ + "args", + "embedded_player_response" +] + +item_author_image =[ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnail", + "thumbnails", + 0, + "url" +] + + +item_thumbnail = [ + "defaultThumbnail", + "thumbnails", + 2, + "url" +] + +item_channel_name = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "expandedRenderer", + "embeddedPlayerOverlayVideoDetailsExpandedRenderer", + "title", + "runs", + 0, + "text" +] + +item_moving_thumbnail = [ + "movingThumbnail", + "thumbnails", + 0, + "url" +] + class VideoInfo: def __init__(self,video_id): self.video_id = video_id - self.info = self._get_info(video_id) + text = self._get_page_text(video_id) + self._parse(text) + self._get_attributes() - def _get_info(self,video_id): + def _get_attributes(self): + self.duration = self._duration() + self.channel_id = self._channel_id() + self.channel_name = self._channel_name() + self.thumbnail = self._thumbnail() + self.author_image = self._author_image() + self.title = self._title() + self.moving_thumbnail = self._moving_thumbnail() + + def _get_page_text(self,video_id): url = f"https://www.youtube.com/embed/{video_id}" resp= requests.get(url, headers = headers) resp.raise_for_status() - return self._parse(resp.text) + return resp.text - def _parse(self,html): - result = re.search(pattern, html) + def _parse(self, text): + result = re.search(pattern, text) res= json.loads(result.group(1)) - response = res["args"].get("embedded_player_response") + response = self._get_item(res, item_response) if response is None: - raise InvalidVideoIdException("動画IDが無効です。") - renderer = (json.loads(response))["embedPreview"]["thumbnailPreviewRenderer"] - return { - "duration": int(renderer["videoDurationSeconds"]) if renderer.get("videoDurationSeconds") else 0, - "title" : [''.join(run["text"]) for run in renderer["title"]["runs"]][0] if renderer.get("title") else None, - "channelId" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["channelThumbnailEndpoint"]["channelThumbnailEndpoint"]["urlEndpoint"]["urlEndpoint"]["url"][9:] if renderer.get("videoDetails") else None, - "authorProfileImage" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["channelThumbnail"]["thumbnails"][0]["url"] if renderer.get("videoDetails") else None, - "thumbnail" : renderer["defaultThumbnail"]["thumbnails"][2]["url"] if renderer.get("defaultThumbnail") else None, - "channelName" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["expandedRenderer"]["embeddedPlayerOverlayVideoDetailsExpandedRenderer"]["title"]["runs"][0]["text"] if renderer.get("videoDetails") else None, - "movingThumbnail" : renderer["movingThumbnail"]["thumbnails"][0]["url"] if renderer.get("movingThumbnail") else None - } + raise InvalidVideoIdException( + f"Specified video_id [{self.video_id}] is invalid.") + self.renderer = self._get_item(json.loads(response), item_renderer) + if self.renderer is None: + raise InvalidVideoIdException( + f"No renderer found in video_id: [{self.video_id}].") - def get(self,item): - return self.info.get(item) + def _get_item(self, dict_body, items: list): + for item in items: + if dict_body is None: + break + if isinstance(dict_body, dict): + dict_body = dict_body.get(item) + continue + if isinstance(item, int) and \ + isinstance(dict_body, list) and \ + len(dict_body) > item: + dict_body = dict_body[item] + continue + return None + return dict_body + + def _duration(self): + return int(self.renderer.get("videoDurationSeconds") or 0) + + def _title(self): + if self.renderer.get("title"): + return [''.join(run["text"]) + for run in self.renderer["title"]["runs"]][0] + return None + + def _channel_id(self): + channel_url = self._get_item(self.renderer, item_channel_id) + if channel_url: + return channel_url[9:] + return None + + def _author_image(self): + return self._get_item(self.renderer, item_author_image) + + def _thumbnail(self): + return self._get_item(self.renderer, item_thumbnail) + + def _channel_name(self): + return self._get_item(self.renderer, item_channel_name) - - + def _moving_thumbnail(self): + return self._get_item(self.renderer, item_moving_thumbnail) \ No newline at end of file