diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..383078b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include requirements.txt +global-exclude pytchat/testrun*.py \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..aab5189 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +## Install +```bash +pip install yvi +``` + + +## Usage +```python +import yvi + +info = yvi.get_info(video_id = "xxxxxxxx") +info.get_title() +info.get_channel_id() +``` +## Function +### get_info(video_id, session) + +Returns +------- ++ VideoInfo object. + +Parameters +---------- +- video_id : video id + +- session : session object of requests. + + +## Attributes of VideoInfo object +### get_duration() +- 動画の長さ(アーカイブのみ。ライブ動画または待機画面の場合0) + +### get_title() +-   動画タイトル + +### get_title_escaped() +-   動画タイトル(絵文字なし。GUIライブラリ等で絵文字が含まれていてエラーが出る場合はこちらを使用してください) + +### get_channel_id() +- チャンネルID + +### get_thumbnail() +-   動画サムネイルURL + +### get_owner_name() +-   配信者名 + +### get_owner_name_escaped() +-   配信者名(絵文字なし) + +### get_owner_image() +-   配信者プロフィール画像URL + +### get_user_name() +-   視聴者名 + +### get_user_name_escaped() +-   視聴者名(絵文字なし) + +### get_user_image() +-   視聴者プロフィール画像URL diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..663bd1f --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9f7fe8b --- /dev/null +++ b/setup.py @@ -0,0 +1,62 @@ +from setuptools import setup, find_packages, Command +from os import path, system, remove, rename, removedirs +import re + +package_name = "yvi" + +root_dir = path.abspath(path.dirname(__file__)) + +def _requirements(): + return [name.rstrip() + for name in open(path.join( + root_dir, 'requirements.txt')).readlines()] + +with open(path.join(root_dir, package_name, '__init__.py')) as f: + init_text = f.read() + version = re.search( + r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) + license = re.search( + r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) + author = re.search( + r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) + author_email = re.search( + r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) + url = re.search( + r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1) + +assert version +assert license +assert author +assert author_email +assert url + + + + +with open('README.md', encoding='utf-8') as f: + long_description = f.read() + + + +setup( + author=author, + author_email=author_email, + classifiers=[ + 'Natural Language :: Japanese', + 'Development Status :: 4 - Beta', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'License :: OSI Approved :: MIT License', + ], + description="Retrieve youtube video info.", + install_requires=_requirements(), + keywords='youtube', + license=license, + long_description=long_description, + long_description_content_type='text/markdown', + name=package_name, + packages=find_packages(exclude=['*log.txt','*tests','*testrun']), + url=url, + version=version, +) \ No newline at end of file diff --git a/yvi/__init__.py b/yvi/__init__.py new file mode 100644 index 0000000..b519ea9 --- /dev/null +++ b/yvi/__init__.py @@ -0,0 +1,12 @@ +""" +Retriever tool for youtube video information. +""" +__copyright__ = 'Copyright (C) 2020 taizan-hokuto' +__version__ = '0.0.1' +__license__ = 'MIT' +__author__ = 'taizan-hokuto' +__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' +__url__ = 'https://github.com/taizan-hokuto/yvi' + +from yvi.yvi import get_info + diff --git a/yvi/config/__init__.py b/yvi/config/__init__.py new file mode 100644 index 0000000..b329f65 --- /dev/null +++ b/yvi/config/__init__.py @@ -0,0 +1,2 @@ +headers = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'} diff --git a/yvi/exceptions.py b/yvi/exceptions.py new file mode 100644 index 0000000..3fd888f --- /dev/null +++ b/yvi/exceptions.py @@ -0,0 +1,9 @@ +class InvalidVideoIdException(Exception): + ''' + Thrown when the video_id is not exist (VideoInfo). + ''' + pass + + +class UnknownConnectionError(Exception): + pass diff --git a/yvi/util/__init__.py b/yvi/util/__init__.py new file mode 100644 index 0000000..5c2ba1c --- /dev/null +++ b/yvi/util/__init__.py @@ -0,0 +1,21 @@ +import datetime + +def save(data,filename,extention): + with open(filename+"_"+(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') + )+extention,mode ='w',encoding='utf-8') as f: + f.writelines(data) + +def get_item(dict_body, items: list): + for item in items: + if dict_body is None: + break + if isinstance(dict_body, dict): + dict_body = dict_body.get(item) + continue + if isinstance(item, int) and \ + isinstance(dict_body, list) and \ + len(dict_body) > item: + dict_body = dict_body[item] + continue + return None + return dict_body \ No newline at end of file diff --git a/yvi/yvi.py b/yvi/yvi.py new file mode 100644 index 0000000..dcb3095 --- /dev/null +++ b/yvi/yvi.py @@ -0,0 +1,194 @@ +from . import config +import emoji +import json +import re +import requests +from . import util +from . exceptions import InvalidVideoIdException + +pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);") + +item_channel_id = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnailEndpoint", + "channelThumbnailEndpoint", + "urlEndpoint", + "urlEndpoint", + "url" +] + +item_renderer = [ + "embedPreview", + "thumbnailPreviewRenderer" +] + +item_response = [ + "args", + "embedded_player_response" +] + +item_owner_image = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "channelThumbnail", + "thumbnails", + 0, + "url" +] + +item_thumbnail = [ + "defaultThumbnail", + "thumbnails", + 2, + "url" +] + +item_owner_name = [ + "videoDetails", + "embeddedPlayerOverlayVideoDetailsRenderer", + "expandedRenderer", + "embeddedPlayerOverlayVideoDetailsExpandedRenderer", + "title", + "runs", + 0, + "text" +] + +item_user_name = [ + "args", + "user_display_name", +] + +item_user_image = [ + "args", + "user_display_image", +] + + +class VideoInfo: + ''' + VideoInfo object retrieves YouTube video information. + + Parameter + --------- + video_id : str + + Exception + --------- + InvalidVideoIdException : + Occurs when video_id does not exist on YouTube. + ''' + + def __init__(self, video_id, session:requests.Session = None): + if session: + self.session = session + else: + self.session = requests.Session() + + self.video_id = video_id + text = self._get_page_text(video_id) + self._parse(text) + + def _get_page_text(self, video_id): + url = f"https://www.youtube.com/embed/{video_id}" + resp = self.session.get(url) + resp.raise_for_status() + return resp.text + + def _parse(self, text): + result = re.search(pattern, text) + self._res = json.loads(result.group(1)) + response = self._get_item(self._res, item_response) + if response is None: + self._check_video_is_private(self._res.get("args")) + + self._renderer = self._get_item(json.loads(response), item_renderer) + if self._renderer is None: + raise InvalidVideoIdException( + f"No renderer found in video_id: [{self.video_id}].") + + def _check_video_is_private(self, args): + if args and args.get("video_id"): + raise InvalidVideoIdException( + f"video_id [{self.video_id}] is private or deleted.") + raise InvalidVideoIdException( + f"video_id [{self.video_id}] is invalid.") + + def _get_item(self, dict_body, items: list): + for item in items: + if dict_body is None: + break + if isinstance(dict_body, dict): + dict_body = dict_body.get(item) + continue + if isinstance(item, int) and \ + isinstance(dict_body, list) and \ + len(dict_body) > item: + dict_body = dict_body[item] + continue + return None + return dict_body + + def get_duration(self): + duration_seconds = self._renderer.get("videoDurationSeconds") + if duration_seconds: + '''Fetched value is string, so cast to integer.''' + return int(duration_seconds) + '''When key is not found, explicitly returns None.''' + return None + + def get_title(self): + if self._renderer.get("title"): + return [''.join(run["text"]) + for run in self._renderer["title"]["runs"]][0] + return None + + def get_title_escaped(self): + return self._no_emoji(self.get_title()) + + def get_channel_id(self): + channel_url = self._get_item(self._renderer, item_channel_id) + if channel_url: + return channel_url[9:] + return None + + def get_thumbnail(self): + return self._get_item(self._renderer, item_thumbnail) + + def get_owner_image(self): + return self._get_item(self._renderer, item_owner_image) + + def get_owner_name(self): + return self._get_item(self._renderer, item_owner_name) + + def get_owner_name_escaped(self): + return self._no_emoji(self.get_owner_name()) + + def get_user_name(self): + return self._get_item(self._res, item_user_name) + + def get_user_name_escaped(self): + return self._no_emoji(self.get_user_name()) + + def get_user_image(self): + return self._get_item(self._res, item_user_image) + + def _no_emoji(self, text:str): + if text is None: + return None + return ''.join(c for c in text + if c not in emoji.UNICODE_EMOJI) + +def get_info(video_id:str, session:requests.Session = None) -> VideoInfo: + """ + Paaramters + ---------- + video_id : str : + video_id + + session : requests.Session + session object + """ + + return VideoInfo(video_id = video_id, session = session)