From ebf0e7c1813f60a01a86f0cd18aff30eabe406c2 Mon Sep 17 00:00:00 2001
From: taizan-hokouto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Mon, 5 Oct 2020 21:38:51 +0900
Subject: [PATCH] Fix handling json decode error and pattern unmatch

---
 pytchat/cli/__init__.py          | 43 ++++++++++++--------------------
 pytchat/tool/videoinfo.py        | 15 +++++++----
 pytchat/util/extract_video_id.py |  3 +++
 3 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/pytchat/cli/__init__.py b/pytchat/cli/__init__.py
index 97a6442..b88f95a 100644
--- a/pytchat/cli/__init__.py
+++ b/pytchat/cli/__init__.py
@@ -2,7 +2,6 @@ import argparse
 
 import os
 import signal
-import time
 from json.decoder import JSONDecodeError
 from pathlib import Path
 from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
@@ -38,6 +37,7 @@ def main():
                         help='Save error data when error occurs(".dat" file)')
     parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
                         help='Show version')
+
     Arguments(parser.parse_args().__dict__)
 
     if Arguments().print_version:
@@ -48,39 +48,33 @@ def main():
     if not Arguments().video_ids:
         parser.print_help()
         return
+
+    if not os.path.exists(Arguments().output):
+        print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
+        return
+
     for counter, video_id in enumerate(Arguments().video_ids):
-        if '[' in video_id:
-            video_id = video_id.replace('[', '').replace(']', '')
         if len(Arguments().video_ids) > 1:
             print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
 
         try:
             video_id = extract_video_id(video_id)
-            if not os.path.exists(Arguments().output):
-                raise FileNotFoundError
             separated_path = str(Path(Arguments().output)) + os.path.sep
             path = util.checkpath(separated_path + video_id + '.html')
-            err = None
-            for _ in range(3):  # retry 3 times
-                try:
-                    info = VideoInfo(video_id)
-                    break
-                except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
-                    err = e
-                    time.sleep(2)
-                    continue
-            else:
-                print("Cannot parse video information.:{}".format(video_id))
+            try:
+                info = VideoInfo(video_id)
+            except Exception as e:
+                print("Cannot parse video information.:{} {}".format(video_id, type(e)))
                 if Arguments().save_error_data:
-                    util.save(err.doc, "ERR", ".dat")
+                    util.save(str(e), "ERR", ".dat")
                 continue
 
             print(f"\n"
                   f" video_id: {video_id}\n"
                   f" channel:  {info.get_channel_name()}\n"
-                  f" title:    {info.get_title()}")
+                  f" title:    {info.get_title()}\n"
+                  f" output path: {path}")
 
-            print(f" output path: {path}")
             duration = info.get_duration()
             pbar = ProgressBar(total=(duration * 1000), status="Extracting")
             ex = Extractor(video_id,
@@ -107,17 +101,12 @@ def main():
             print("Invalid Video ID or URL:", video_id)
         except NoContents as e:
             print(e)
-        except FileNotFoundError:
-            print("The specified directory does not exist.:{}".format(Arguments().output))
-        except JSONDecodeError as e:
-            print(e.msg)
-            print("JSONDecodeError.:{}".format(video_id))
+        except (JSONDecodeError, PatternUnmatchError) as e:
+            print("{}:{}".format(e.msg, video_id))
             if Arguments().save_error_data:
-                util.save(e.doc, "ERR_JSON_DECODE", ".dat")
+                util.save(e.doc, "ERR_", ".dat")
         except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
             print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
-        except PatternUnmatchError:
-            print(f"PatternUnmatchError [{video_id}]. ")
         except Exception as e:
             print(type(e), str(e))
 
diff --git a/pytchat/tool/videoinfo.py b/pytchat/tool/videoinfo.py
index 8510a5a..722a619 100644
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -2,7 +2,7 @@ import httpx
 import json
 import re
 import time
-from httpx import ConnectError, NetworkError
+from httpx import ConnectError, NetworkError, TimeoutException
 from .. import config
 from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
 from ..util.extract_video_id import extract_video_id
@@ -83,16 +83,21 @@ class VideoInfo:
 
     def __init__(self, video_id):
         self.video_id = extract_video_id(video_id)
+        err = None
         for _ in range(3):
             try:
                 text = self._get_page_text(self.video_id)
                 self._parse(text)
                 break
-            except PatternUnmatchError:
+            except (InvalidVideoIdException, UnknownConnectionError) as e:
+                print(str(e))
+                raise e
+            except Exception as e:
+                err = e
                 time.sleep(2)
                 pass
         else:
-            raise PatternUnmatchError("Pattern Unmatch")
+            raise err
 
     def _get_page_text(self, video_id):
         url = f"https://www.youtube.com/embed/{video_id}"
@@ -102,7 +107,7 @@ class VideoInfo:
                 resp = httpx.get(url, headers=headers)
                 resp.raise_for_status()
                 break
-            except (ConnectError, NetworkError) as e:
+            except (ConnectError, NetworkError, TimeoutException) as e:
                 err = e
                 time.sleep(3)
         else:
@@ -113,7 +118,7 @@ class VideoInfo:
     def _parse(self, text):
         result = re.search(pattern, text)
         if result is None:
-            raise PatternUnmatchError()
+            raise PatternUnmatchError(doc=text)
         decoder = json.JSONDecoder()
         res = decoder.raw_decode(result.group(1)[:-1])[0]
         response = self._get_item(res, item_response)
diff --git a/pytchat/util/extract_video_id.py b/pytchat/util/extract_video_id.py
index 75385f8..c62cd89 100644
--- a/pytchat/util/extract_video_id.py
+++ b/pytchat/util/extract_video_id.py
@@ -8,6 +8,9 @@ YT_VIDEO_ID_LENGTH = 11
 
 def extract_video_id(url_or_id: str) -> str:
     ret = ''
+    if '[' in url_or_id:
+        url_or_id = url_or_id.replace('[', '').replace(']', '')
+
     if type(url_or_id) != str:
         raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
     if len(url_or_id) == YT_VIDEO_ID_LENGTH: