From 96474f10c6fc421f8091d02648504592fe0f469a Mon Sep 17 00:00:00 2001 From: Etian Daniel Alavardo Mtz <42478705+EtianAM@users.noreply.github.com> Date: Thu, 20 Aug 2020 22:29:59 -0500 Subject: [PATCH 1/4] Fix videoinfo.py A bit ugly, but I couldn't solve it any other way. I'm bad with regex. --- pytchat/tool/videoinfo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index a87e0c4..15bac14 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -7,7 +7,7 @@ from ..util.extract_video_id import extract_video_id headers = config.headers -pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);") +pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})") item_channel_id = [ "videoDetails", @@ -91,7 +91,8 @@ class VideoInfo: def _parse(self, text): result = re.search(pattern, text) - res = json.loads(result.group(1)) + result = result.group(1)[:-1] + res = json.loads(result) response = self._get_item(res, item_response) if response is None: self._check_video_is_private(res.get("args")) From 89b51c420f42a88a922fab2342008e690cd92a1e Mon Sep 17 00:00:00 2001 From: Etian Daniel Alavardo Mtz <42478705+EtianAM@users.noreply.github.com> Date: Thu, 20 Aug 2020 22:39:32 -0500 Subject: [PATCH 2/4] Avoid changing the type of result. However, if this argument is used elsewhere in the code it should be corrected. --- pytchat/tool/videoinfo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py index 15bac14..7b37771 100644 --- a/pytchat/tool/videoinfo.py +++ b/pytchat/tool/videoinfo.py @@ -91,8 +91,7 @@ class VideoInfo: def _parse(self, text): result = re.search(pattern, text) - result = result.group(1)[:-1] - res = json.loads(result) + res = json.loads(result.group(1)[:-1]) response = self._get_item(res, item_response) if response is None: self._check_video_is_private(res.get("args")) From 56294d6a67b6088c1ae22227ff085a904b7b7a11 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Fri, 21 Aug 2020 22:23:33 +0900 Subject: [PATCH 3/4] Fix extracting video_id --- pytchat/cli/__init__.py | 8 +++++--- pytchat/exceptions.py | 6 ++++++ pytchat/tool/extract/extractor.py | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py index 2c72db1..0d09de2 100644 --- a/pytchat/cli/__init__.py +++ b/pytchat/cli/__init__.py @@ -3,7 +3,7 @@ import os from pathlib import Path from pytchat.util.extract_video_id import extract_video_id from .arguments import Arguments -from .. exceptions import InvalidVideoIdException, NoContents +from .. exceptions import InvalidVideoIdException, NoContents, VideoInfoParseException from .. processors.html_archiver import HTMLArchiver from .. tool.extract.extractor import Extractor from .. tool.videoinfo import VideoInfo @@ -40,11 +40,11 @@ def main(): if '[' in video_id: video_id = video_id.replace('[', '').replace(']', '') try: + video_id = extract_video_id(video_id) if os.path.exists(Arguments().output): path = Path(Arguments().output + video_id + '.html') else: raise FileNotFoundError - video_id = extract_video_id(video_id) info = VideoInfo(video_id) print(f"Extracting...\n" f" video_id: {video_id}\n" @@ -63,7 +63,9 @@ def main(): except (TypeError, NoContents) as e: print(e) except FileNotFoundError: - print("The specified directory does not exist.:{}".format(Arguments().output )) + print("The specified directory does not exist.:{}".format(Arguments().output)) + except VideoInfoParseException: + print("Cannot parse video information.:{}".format(video_id)) return parser.print_help() diff --git a/pytchat/exceptions.py b/pytchat/exceptions.py index d8de40e..1c1793c 100644 --- a/pytchat/exceptions.py +++ b/pytchat/exceptions.py @@ -62,3 +62,9 @@ class ReceivedUnknownContinuation(ChatParseException): class FailedExtractContinuation(ChatDataFinished): pass + + +class VideoInfoParseException(Exception): + ''' + thrown when failed to parse video info + ''' diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index 56bd8aa..4647f9d 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -79,7 +79,7 @@ class Extractor: def extract(self): if self.duration == 0: - print("video is not archived.") + print("\nCannot extract chat data:\n The specified video has not yet been archived.") return [] data = self._execute_extract_operations() if self.processor is None: From 6b924a88ef328f3bba2974d8ebd2e3765b6e627a Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Fri, 21 Aug 2020 22:25:06 +0900 Subject: [PATCH 4/4] Increment version --- pytchat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytchat/__init__.py b/pytchat/__init__.py index ccdd974..b5177cd 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,7 +2,7 @@ pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.1.3' +__version__ = '0.1.4' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'