diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..b329f65 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,2 @@ +headers = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'} diff --git a/exceptions.py b/exceptions.py new file mode 100644 index 0000000..3fd888f --- /dev/null +++ b/exceptions.py @@ -0,0 +1,9 @@ +class InvalidVideoIdException(Exception): + ''' + Thrown when the video_id is not exist (VideoInfo). + ''' + pass + + +class UnknownConnectionError(Exception): + pass diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e69de29 diff --git a/util/__init__.py b/util/__init__.py new file mode 100644 index 0000000..5c2ba1c --- /dev/null +++ b/util/__init__.py @@ -0,0 +1,21 @@ +import datetime + +def save(data,filename,extention): + with open(filename+"_"+(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') + )+extention,mode ='w',encoding='utf-8') as f: + f.writelines(data) + +def get_item(dict_body, items: list): + for item in items: + if dict_body is None: + break + if isinstance(dict_body, dict): + dict_body = dict_body.get(item) + continue + if isinstance(item, int) and \ + isinstance(dict_body, list) and \ + len(dict_body) > item: + dict_body = dict_body[item] + continue + return None + return dict_body \ No newline at end of file diff --git a/yvi.py b/yvi.py new file mode 100644 index 0000000..0e78f4b --- /dev/null +++ b/yvi.py @@ -0,0 +1,176 @@ +import json +import re +import requests +import config +from exceptions import InvalidVideoIdException + +headers = config.headers + +pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);") + +item_channel_id = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnailEndpoint", + "channelThumbnailEndpoint", + "urlEndpoint", + "urlEndpoint", + "url" +] + +item_renderer = [ + "embedPreview", + "thumbnailPreviewRenderer" +] + +item_response = [ + "args", + "embedded_player_response" +] + +item_owner_image = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnail", + "thumbnails", + 0, + "url" +] + +item_thumbnail = [ + "defaultThumbnail", + "thumbnails", + 2, + "url" +] + +item_channel_name = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "expandedRenderer", + "embeddedPlayerOverlayVideoDetailsExpandedRenderer", + "title", + "runs", + 0, + "text" +] + +item_username = [ + "args", + "user_display_name", +] + +item_userimage = [ + "args", + "user_display_image", +] + + +item_moving_thumbnail = [ + "movingThumbnail", + "thumbnails", + 0, + "url" +] + + +class VideoInfo: + ''' + VideoInfo object retrieves YouTube video information. + + Parameter + --------- + video_id : str + + Exception + --------- + InvalidVideoIdException : + Occurs when video_id does not exist on YouTube. + ''' + + def __init__(self, video_id, session=None): + if session: + self.session = session + else: + self.session = requests.Session() + + self.video_id = video_id + text = self._get_page_text(video_id) + self._parse(text) + + def _get_page_text(self, video_id): + url = f"https://www.youtube.com/embed/{video_id}" + resp = self.session.get(url, headers=headers) + resp.raise_for_status() + return resp.text + + def _parse(self, text): + result = re.search(pattern, text) + res = json.loads(result.group(1)) + response = self._get_item(res, item_response) + if response is None: + self._check_video_is_private(res.get("args")) + self._renderer = self._get_item(json.loads(response), item_renderer) + if self._renderer is None: + raise InvalidVideoIdException( + f"No renderer found in video_id: [{self.video_id}].") + + def _check_video_is_private(self, args): + if args and args.get("video_id"): + raise InvalidVideoIdException( + f"video_id [{self.video_id}] is private or deleted.") + raise InvalidVideoIdException( + f"video_id [{self.video_id}] is invalid.") + + def _get_item(self, dict_body, items: list): + for item in items: + if dict_body is None: + break + if isinstance(dict_body, dict): + dict_body = dict_body.get(item) + continue + if isinstance(item, int) and \ + isinstance(dict_body, list) and \ + len(dict_body) > item: + dict_body = dict_body[item] + continue + return None + return dict_body + + def get_duration(self): + duration_seconds = self._renderer.get("videoDurationSeconds") + if duration_seconds: + '''Fetched value is string, so cast to integer.''' + return int(duration_seconds) + '''When key is not found, explicitly returns None.''' + return None + + def get_title(self): + if self._renderer.get("title"): + return [''.join(run["text"]) + for run in self._renderer["title"]["runs"]][0] + return None + + def get_channel_id(self): + channel_url = self._get_item(self._renderer, item_channel_id) + if channel_url: + return channel_url[9:] + return None + + def get_owner_image(self): + return self._get_item(self._renderer, item_owner_image) + + def get_thumbnail(self): + return self._get_item(self._renderer, item_thumbnail) + + def get_channel_name(self): + return self._get_item(self._renderer, item_channel_name) + + def get_username(self): + return self._get_item(self._renderer, item_username) + + def get_userimage(self): + return self._get_item(self._renderer, item_userimage) + + def get_moving_thumbnail(self): + return self._get_item(self._renderer, item_moving_thumbnail)