Merge branch 'hotfix/filepath'

Increment version
Make sure to pass fixed filepath to processor
2020-10-04 11:33:58 +09:00 · 2020-10-04 11:30:07 +09:00 · 2020-10-04 11:29:52 +09:00 · 2020-10-04 10:32:53 +09:00 · 2020-10-04 10:22:34 +09:00 · 2020-10-04 10:20:14 +09:00
9 changed files with 135 additions and 38 deletions
--- a/pytchat/init.py
+++ b/pytchat/init.py
@@ -2,7 +2,7 @@
 pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
 """
 __copyright__    = 'Copyright (C) 2019 taizan-hokuto'
-__version__      = '0.2.1'
+__version__      = '0.3.0'
 __license__      = 'MIT'
 __author__       = 'taizan-hokuto'
 __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -2,11 +2,13 @@ import argparse
 import os
 import signal
 import time
 from json.decoder import JSONDecodeError
 from pathlib import Path
 from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
 from .arguments import Arguments
 from .progressbar import ProgressBar
-from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
+from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
 from .. processors.html_archiver import HTMLArchiver
 from .. tool.extract.extractor import Extractor
 from .. tool.videoinfo import VideoInfo
@@ -49,24 +51,39 @@ def main():
    for counter, video_id in enumerate(Arguments().video_ids):
        if '[' in video_id:
            video_id = video_id.replace('[', '').replace(']', '')
        if len(Arguments().video_ids) > 1:
            print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
        try:
            video_id = extract_video_id(video_id)
-            if os.path.exists(Arguments().output):
+            if not os.path.exists(Arguments().output):
                path = Path(Arguments().output + video_id + '.html')
            else:
                raise FileNotFoundError
-            info = VideoInfo(video_id)
+            separated_path = str(Path(Arguments().output)) + os.path.sep
-            if len(Arguments().video_ids) > 1:
+            path = util.checkpath(separated_path + video_id + '.html')
-                print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
+            err = None
            for _ in range(3):  # retry 3 times
                try:
                    info = VideoInfo(video_id)
                    break
                except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
                    err = e
                    time.sleep(2)
                    continue
            else:
                print("Cannot parse video information.:{}".format(video_id))
                if Arguments().save_error_data:
                    util.save(err.doc, "ERR", ".dat")
                continue
            print(f"\n"
                  f" video_id: {video_id}\n"
                  f" channel:  {info.get_channel_name()}\n"
                  f" title:    {info.get_title()}")
-            print(f" output path: {path.resolve()}")
+            print(f" output path: {path}")
            duration = info.get_duration()
            pbar = ProgressBar(total=(duration * 1000), status="Extracting")
-            ex = Extractor(video_id,               
+            ex = Extractor(video_id,
                    callback=pbar._disp,
                    div=10)
            signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
@@ -74,7 +91,7 @@ def main():
            if data == []:
                return False
            pbar.reset("#", "=", total=len(data), status="Rendering  ")
-            processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
+            processor = HTMLArchiver(path, callback=pbar._disp)
            processor.process(
                [{'video_id': None,
                'timeout': 1,
@@ -86,8 +103,6 @@ def main():
            print()
            if pbar.is_cancelled():
                print("\nThe extraction process has been discontinued.\n")
        except InvalidVideoIdException:
            print("Invalid Video ID or URL:", video_id)
        except NoContents as e:
@@ -96,14 +111,15 @@ def main():
            print("The specified directory does not exist.:{}".format(Arguments().output))
        except JSONDecodeError as e:
            print(e.msg)
-            print("Cannot parse video information.:{}".format(video_id))
+            print("JSONDecodeError.:{}".format(video_id))
            if Arguments().save_error_data:
                util.save(e.doc, "ERR_JSON_DECODE", ".dat")
-        except PatternUnmatchError as e:
+        except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
-            print(e.msg)
+            print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
-            print("Cannot parse video information.:{}".format(video_id))
+        except PatternUnmatchError:
-            if Arguments().save_error_data:
+            print(f"PatternUnmatchError [{video_id}]. ")
-                util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
+        except Exception as e:
            print(type(e), str(e))
    return
--- a/pytchat/exceptions.py
+++ b/pytchat/exceptions.py
@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
    '''
    Thrown when the video_id is not exist (VideoInfo).
    '''
-    pass
+    def __init__(self, doc):
        self.msg = "InvalidVideoIdException"
        self.doc = doc
 class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
 class RetryExceedMaxCount(Exception):
    '''
-    thrown when the number of retries exceeds the maximum value.
+    Thrown when the number of retries exceeds the maximum value.
    '''
    pass
@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):
 class VideoInfoParseError(Exception):
    '''
-    thrown when failed to parse video info
+    Base exception when parsing video info.
    '''
 class PatternUnmatchError(VideoInfoParseError):
    '''
-    thrown when failed to parse video info with unmatched pattern
+    Thrown when failed to parse video info with unmatched pattern.
    '''
    def __init__(self, doc):
        self.msg = "PatternUnmatchError"
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -1,9 +1,12 @@
 import httpx
 import os
 import re
-import httpx
+import time
 from base64 import standard_b64encode
 from httpx import NetworkError, ReadTimeout
 from .chat_processor import ChatProcessor
 from .default.processor import DefaultProcessor
 from ..exceptions import UnknownConnectionError
 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -112,7 +115,18 @@ class HTMLArchiver(ChatProcessor):
                       for item in message_items)
    def _encode_img(self, url):
-        resp = httpx.get(url)
+        err = None
        for _ in range(5):
            try:
                resp = httpx.get(url, timeout=30)
                break
            except (NetworkError, ReadTimeout) as e:
                print("Network Error. retrying...")
                err = e
                time.sleep(3)
        else:
            raise UnknownConnectionError(str(err))
        return standard_b64encode(resp.content).decode()
    def _set_emoji_table(self, item: dict):
--- a/pytchat/tool/extract/asyncdl.py
+++ b/pytchat/tool/extract/asyncdl.py
@@ -8,14 +8,19 @@ from ... import config
 from ... paramgen import arcparam
 from ... exceptions import UnknownConnectionError
 from concurrent.futures import CancelledError
 from httpx import NetworkError, ReadTimeout
 from json import JSONDecodeError
 from urllib.parse import quote
 headers = config.headers
 REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
             "get_live_chat_replay?continuation="
 MAX_RETRY_COUNT = 3
 # Set to avoid duplicate parameters
 param_set = set()
 def _split(start, end, count, min_interval_sec=120):
    """
@@ -50,6 +55,7 @@ def _split(start, end, count, min_interval_sec=120):
 def ready_blocks(video_id, duration, div, callback):
    param_set.clear()
    if div <= 0:
        raise ValueError
@@ -62,16 +68,24 @@ def ready_blocks(video_id, duration, div, callback):
    async def _create_block(session, video_id, seektime, callback):
        continuation = arcparam.getparam(video_id, seektime=seektime)
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
                if continuation in param_set:
                    next_continuation, actions = None, []
                    break
                param_set.add(continuation)
                resp = await session.get(url, headers=headers)
                next_continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
            except (NetworkError, ReadTimeout) as e:
                err = e
                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))
        if actions:
            first = parser.get_offset(actions[0])
@@ -110,16 +124,24 @@ def fetch_patch(callback, blocks, video_id):
    async def _fetch(continuation, session) -> Patch:
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
                if continuation in param_set:
                    continuation, actions = None, []
                    break
                param_set.add(continuation)
                resp = await session.get(url, headers=config.headers)
                continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
            except (NetworkError, ReadTimeout) as e:
                err = e
                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))
        if actions:
            last = parser.get_offset(actions[-1])
--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -93,4 +93,5 @@ class Extractor:
        return ret
    def cancel(self):
        print("cancel")
        asyncdl.cancel()
--- a/pytchat/tool/extract/worker.py
+++ b/pytchat/tool/extract/worker.py
@@ -7,7 +7,6 @@ from typing import Tuple
 class ExtractWorker:
    """
    ExtractWorker associates a download session with a block.
    When the worker finishes fetching, the block
    being fetched is splitted and assigned the free worker.
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -1,13 +1,16 @@
 import httpx
 import json
 import re
-import httpx
+import time
 from httpx import ConnectError, NetworkError
 from .. import config
-from ..exceptions import InvalidVideoIdException, PatternUnmatchError
+from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
 from ..util.extract_video_id import extract_video_id
 headers = config.headers
-pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
+headers = config.headers
 pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
 item_channel_id = [
    "videoDetails",
@@ -80,19 +83,37 @@ class VideoInfo:
    def __init__(self, video_id):
        self.video_id = extract_video_id(video_id)
-        text = self._get_page_text(self.video_id)
+        for _ in range(3):
-        self._parse(text)
+            try:
                text = self._get_page_text(self.video_id)
                self._parse(text)
                break
            except PatternUnmatchError:
                time.sleep(2)
                pass
        else:
            raise PatternUnmatchError("Pattern Unmatch")
    def _get_page_text(self, video_id):
        url = f"https://www.youtube.com/embed/{video_id}"
-        resp = httpx.get(url, headers=headers)
+        err = None
-        resp.raise_for_status()
+        for _ in range(3):
            try:
                resp = httpx.get(url, headers=headers)
                resp.raise_for_status()
                break
            except (ConnectError, NetworkError) as e:
                err = e
                time.sleep(3)
        else:
            raise UnknownConnectionError(str(err))
        return resp.text
    def _parse(self, text):
        result = re.search(pattern, text)
        if result is None:
-            raise PatternUnmatchError(text)
+            raise PatternUnmatchError()
        decoder = json.JSONDecoder()
        res = decoder.raw_decode(result.group(1)[:-1])[0]
        response = self._get_item(res, item_response)
--- a/pytchat/util/init.py
+++ b/pytchat/util/init.py
@@ -1,8 +1,12 @@
 import datetime
 import httpx
 import json
-import datetime
+import os
 import re
 from .. import config
 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
 def extract(url):
    _session = httpx.Client(http2=True)
@@ -16,3 +20,21 @@ def save(data, filename, extention):
    with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
              mode='w', encoding='utf-8') as f:
        f.writelines(data)
 def checkpath(filepath):
    splitter = os.path.splitext(os.path.basename(filepath))
    body = splitter[0]
    extention = splitter[1]
    newpath = filepath
    counter = 1
    while os.path.exists(newpath):
        match = re.search(PATTERN, body)
        if match:
            counter = int(match[2]) + 1
            num_with_bracket = f'({str(counter)})'
            body = f'{match[1]}{num_with_bracket}'
        else:
            body = f'{body}({str(counter)})'
        newpath = os.path.join(os.path.dirname(filepath), body + extention)
    return newpath
Author	SHA1	Message	Date
taizan-hokouto	3106b3e545	Merge branch 'hotfix/filepath'	2020-10-04 11:33:58 +09:00
taizan-hokouto	50816a661d	Increment version	2020-10-04 11:30:07 +09:00
taizan-hokouto	6755bc8bb2	Make sure to pass fixed filepath to processor	2020-10-04 11:29:52 +09:00
taizan-hokouto	26be989b9b	Merge branch 'hotfix/fix'	2020-10-04 10:32:53 +09:00
taizan-hokouto	73ad0a1f44	Increment version	2020-10-04 10:22:34 +09:00
taizan-hokouto	66b185ebf7	Fix constructing filepath	2020-10-04 10:20:14 +09:00
taizan_hokuto	71650c39f7	Merge branch 'hotfix/fix'	2020-10-03 22:42:48 +09:00
taizan_hokuto	488445c73b	Increment version	2020-10-03 22:41:53 +09:00
taizan_hokuto	075e811efe	Delete unnecessary code	2020-10-03 22:41:12 +09:00
taizan_hokuto	58d9bf7fdb	Merge branch 'hotfix/pattern'	2020-10-03 22:35:46 +09:00
taizan_hokuto	b3e6275de7	Increment version	2020-10-03 22:35:22 +09:00
taizan_hokuto	748778f545	Fix pattern matching	2020-10-03 22:04:09 +09:00
taizan-hokuto	e29b3b8377	Merge branch 'hotfix/network'	2020-09-14 00:40:40 +09:00
taizan-hokuto	0859ed5fb1	Increment version	2020-09-14 00:29:21 +09:00
taizan-hokuto	a80d5ba080	Fix handling network error	2020-09-14 00:28:41 +09:00
taizan-hokuto	b7e6043a71	Merge branch 'hotfix/memory'	2020-09-12 02:12:46 +09:00
taizan-hokuto	820ba35013	Increment version	2020-09-12 02:02:07 +09:00
taizan-hokuto	ecd2d130bf	Clear set each time the extraction changes	2020-09-12 01:57:55 +09:00
taizan-hokuto	f77a2c889b	Merge branch 'hotfix/not_quit'	2020-09-12 00:57:48 +09:00
taizan-hokuto	47d5ab288f	Increment version	2020-09-12 00:49:37 +09:00
taizan-hokuto	5f53fd24dd	Format	2020-09-12 00:48:40 +09:00
taizan-hokuto	11a9d0e2d7	Fix a problem with extraction not completing	2020-09-12 00:42:30 +09:00
taizan-hokuto	480c9e15b8	Merge branch 'hotfix/continue_error'	2020-09-11 00:21:07 +09:00
taizan-hokuto	35aa7636f6	Increment version	2020-09-11 00:20:24 +09:00
taizan-hokuto	8fee67c2d4	Fix handling video info error	2020-09-11 00:18:09 +09:00