Merge branch 'hotfix/fix'

Increment version
Delete unnecessary code
2020-10-03 22:42:48 +09:00 · 2020-10-03 22:41:53 +09:00 · 2020-10-03 22:41:12 +09:00 · 2020-10-03 22:35:46 +09:00 · 2020-10-03 22:35:22 +09:00 · 2020-10-03 22:04:09 +09:00
22 changed files with 236 additions and 539 deletions
--- a/pytchat/init.py
+++ b/pytchat/init.py
@@ -2,7 +2,7 @@
 pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
 """
 __copyright__    = 'Copyright (C) 2019 taizan-hokuto'
-__version__      = '0.1.6'
+__version__      = '0.2.7'
 __license__      = 'MIT'
 __author__       = 'taizan-hokuto'
 __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -2,11 +2,13 @@ import argparse

 import os
 import signal
+import time
 from json.decoder import JSONDecodeError
 from pathlib import Path
+from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
 from .arguments import Arguments
 from .progressbar import ProgressBar
-from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
+from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
 from .. processors.html_archiver import HTMLArchiver
 from .. tool.extract.extractor import Extractor
 from .. tool.videoinfo import VideoInfo
@@ -32,11 +34,12 @@ def main():
                        'If ID starts with a hyphen (-), enclose the ID in square brackets.')
    parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
                        help='Output directory (end with "/"). default="./"', default='./')
-    parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
-                        help='Show version')
    parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
                        help='Save error data when error occurs(".dat" file)')
+    parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
+                        help='Show version')
    Arguments(parser.parse_args().__dict__)
+
    if Arguments().print_version:
        print(f'pytchat v{__version__}     © 2019 taizan-hokuto')
        return
@@ -45,56 +48,84 @@ def main():
    if not Arguments().video_ids:
        parser.print_help()
        return
-    for video_id in Arguments().video_ids:
+    for counter, video_id in enumerate(Arguments().video_ids):
        if '[' in video_id:
            video_id = video_id.replace('[', '').replace(']', '')
+        if len(Arguments().video_ids) > 1:
+            print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
+
        try:
            video_id = extract_video_id(video_id)
            if os.path.exists(Arguments().output):
-                path = Path(Arguments().output + video_id + '.html')
+                if Arguments().output[-1] != "/" or Arguments().output[-1] != "\\":
+                    Arguments().output = '/'.join([Arguments().output, os.path.sep])
+                path = util.checkpath(Path.resolve(Path(Arguments().output + video_id + '.html')))
            else:
                raise FileNotFoundError
+            err = None
+            for _ in range(3):  # retry 3 times
+                try:
                    info = VideoInfo(video_id)
-            print(f"Extracting...\n"
+                    break
+                except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
+                    err = e
+                    time.sleep(2)
+                    continue
+            else:
+                print("Cannot parse video information.:{}".format(video_id))
+                if Arguments().save_error_data:
+                    util.save(err.doc, "ERR", ".dat")
+                continue
+
+            print(f"\n"
                  f" video_id: {video_id}\n"
                  f" channel:  {info.get_channel_name()}\n"
                  f" title:    {info.get_title()}")

-            print(f" output path: {path.resolve()}")
+            print(f" output path: {path}")
            duration = info.get_duration()
-            pbar = ProgressBar(duration)
+            pbar = ProgressBar(total=(duration * 1000), status="Extracting")
            ex = Extractor(video_id,
-                    processor=HTMLArchiver(Arguments().output + video_id + '.html'),
                    callback=pbar._disp,
                    div=10)
            signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
-            ex.extract()
+            data = ex.extract()
+            if data == []:
+                return False
+            pbar.reset("#", "=", total=len(data), status="Rendering  ")
+            processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
+            processor.process(
+                [{'video_id': None,
+                'timeout': 1,
+                'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
+            )
+            processor.finalize()
+            pbar.reset('#', '#', status='Completed   ')
            pbar.close()
+            print()
            if pbar.is_cancelled():
                print("\nThe extraction process has been discontinued.\n")
-                return
-            print("\nThe extraction process has been completed.\n")
        except InvalidVideoIdException:
            print("Invalid Video ID or URL:", video_id)
-        except (TypeError, NoContents) as e:
-
-            print(e.with_traceback())
+        except NoContents as e:
+            print(e)
        except FileNotFoundError:
            print("The specified directory does not exist.:{}".format(Arguments().output))
        except JSONDecodeError as e:
            print(e.msg)
-            print("Cannot parse video information.:{}".format(video_id))
+            print("JSONDecodeError.:{}".format(video_id))
            if Arguments().save_error_data:
                util.save(e.doc, "ERR_JSON_DECODE", ".dat")
-        except PatternUnmatchError as e:
-            print(e.msg)
-            print("Cannot parse video information.:{}".format(video_id))
-            if Arguments().save_error_data:
-                util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
+        except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
+            print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
+        except PatternUnmatchError:
+            print(f"PatternUnmatchError [{video_id}]. ")
+        except Exception as e:
+            print(type(e), str(e))

    return


-def cancel(ex: Extractor, pbar: ProgressBar):
+def cancel(ex, pbar):
    ex.cancel()
    pbar.cancel()
--- a/pytchat/cli/arguments.py
+++ b/pytchat/cli/arguments.py
@@ -36,6 +36,7 @@ class Arguments(metaclass=Singleton):
        self.output: str = arguments[Arguments.Name.OUTPUT]
        self.video_ids: List[int] = []
        self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
+
        # Videos
        if arguments[Arguments.Name.VIDEO_IDS]:
            self.video_ids = [video_id
--- a/pytchat/cli/progressbar.py
+++ b/pytchat/cli/progressbar.py
@@ -4,35 +4,50 @@ vladignatyev/progress.py
 https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
 (MIT License)
 '''
+import shutil
 import sys


 class ProgressBar:
-    def __init__(self, duration):
-        self._duration = duration
-        self._count = 0
+    def __init__(self, total, status):
        self._bar_len = 60
        self._cancelled = False
+        self.reset(total=total, status=status)
+        self._blinker = 0
+        
+    def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
+        self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
+        self._symbol_done = symbol_done
+        self._symbol_space = symbol_space
+        self._total = total
+        self._status = status
+        self._count = 0

    def _disp(self, _, fetched):
-        self._progress(fetched / 1000, self._duration)
+        self._progress(fetched, self._total)

-    def _progress(self, fillin, total, status=''):
+    def _progress(self, fillin, total):
        if total == 0 or self._cancelled:
            return
        self._count += fillin
        filled_len = int(round(self._bar_len * self._count / float(total)))
        percents = round(100.0 * self._count / float(total), 1)
+        if percents > 100:
+            percents = 100.0
        if filled_len > self._bar_len:
            filled_len = self._bar_len
-            percents = 100
-        bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
-        sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
+            
+        bar = self._symbol_done * filled_len + \
+              self._symbol_space * (self._bar_len - filled_len)
+        disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
+
+        sys.stdout.write(disp)
        sys.stdout.flush()
+        self._blinker += 1

    def close(self):
        if not self._cancelled:
-            self._progress(self._duration, self._duration)
+            self._progress(self._total, self._total)

    def cancel(self):
        self._cancelled = True
--- a/pytchat/exceptions.py
+++ b/pytchat/exceptions.py
@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
    '''
    Thrown when the video_id is not exist (VideoInfo).
    '''
-    pass
+    def __init__(self, doc):
+        self.msg = "InvalidVideoIdException"
+        self.doc = doc


 class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):

 class RetryExceedMaxCount(Exception):
    '''
-    thrown when the number of retries exceeds the maximum value.
+    Thrown when the number of retries exceeds the maximum value.
    '''
    pass

@@ -66,13 +68,13 @@ class FailedExtractContinuation(ChatDataFinished):

 class VideoInfoParseError(Exception):
    '''
-    thrown when failed to parse video info
+    Base exception when parsing video info.
    '''


 class PatternUnmatchError(VideoInfoParseError):
    '''
-    thrown when failed to parse video info with unmatched pattern
+    Thrown when failed to parse video info with unmatched pattern.
    '''
    def __init__(self, doc):
        self.msg = "PatternUnmatchError"
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -1,9 +1,12 @@
+import httpx
 import os
 import re
-import httpx
+import time
 from base64 import standard_b64encode
+from httpx import NetworkError, ReadTimeout
 from .chat_processor import ChatProcessor
 from .default.processor import DefaultProcessor
+from ..exceptions import UnknownConnectionError


 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -43,20 +46,21 @@ class HTMLArchiver(ChatProcessor):
    '''
    HTMLArchiver saves chat data as HTML table format.
    '''
-    def __init__(self, save_path):
+    def __init__(self, save_path, callback=None):
        super().__init__()
        self.save_path = self._checkpath(save_path)
        self.processor = DefaultProcessor()
        self.emoji_table = {}  # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
        self.header = [HEADER_HTML]
        self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
+        self.callback = callback

    def _checkpath(self, filepath):
        splitter = os.path.splitext(os.path.basename(filepath))
        body = splitter[0]
        extention = splitter[1]
        newpath = filepath
-        counter = 0
+        counter = 1
        while os.path.exists(newpath):
            match = re.search(PATTERN, body)
            if match:
@@ -80,8 +84,9 @@ class HTMLArchiver(ChatProcessor):
        """
        if chat_components is None or len(chat_components) == 0:
            return
+        for c in self.processor.process(chat_components).items:
            self.body.extend(
-            (self._parse_html_line((
+                self._parse_html_line((
                    c.datetime,
                    c.elapsedTime,
                    c.author.name,
@@ -89,8 +94,10 @@ class HTMLArchiver(ChatProcessor):
                    c.amountString,
                    c.author.type,
                    c.author.channelId)
-            ) for c in self.processor.process(chat_components).items)
                )
+            )
+            if self.callback:
+                self.callback(None, 1)

    def _parse_html_line(self, raw_line):
        return ''.join(('<tr>',
@@ -108,7 +115,18 @@ class HTMLArchiver(ChatProcessor):
                       for item in message_items)

    def _encode_img(self, url):
-        resp = httpx.get(url)
+        err = None
+        for _ in range(5):
+            try:
+                resp = httpx.get(url, timeout=30)
+                break
+            except (NetworkError, ReadTimeout) as e:
+                print("Network Error. retrying...")
+                err = e
+                time.sleep(3)
+        else:
+            raise UnknownConnectionError(str(err))
+
        return standard_b64encode(resp.content).decode()

    def _set_emoji_table(self, item: dict):
@@ -131,7 +149,7 @@ class HTMLArchiver(ChatProcessor):
    
    def finalize(self):
        self.header.extend([self._create_styles(), '</head>\n'])
-        self.body.extend(['</table>\n</body>'])
+        self.body.extend(['</table>\n</body>\n</html>'])
        with open(self.save_path, mode='a', encoding='utf-8') as f:
            f.writelines(self.header)
            f.writelines(self.body)
--- a/pytchat/tool/extract/asyncdl.py
+++ b/pytchat/tool/extract/asyncdl.py
@@ -8,14 +8,19 @@ from ... import config
 from ... paramgen import arcparam
 from ... exceptions import UnknownConnectionError
 from concurrent.futures import CancelledError
+from httpx import NetworkError, ReadTimeout
 from json import JSONDecodeError
 from urllib.parse import quote

+
 headers = config.headers
 REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
             "get_live_chat_replay?continuation="
 MAX_RETRY_COUNT = 3

+# Set to avoid duplicate parameters
+param_set = set()
+

 def _split(start, end, count, min_interval_sec=120):
    """
@@ -50,6 +55,7 @@ def _split(start, end, count, min_interval_sec=120):


 def ready_blocks(video_id, duration, div, callback):
+    param_set.clear()
    if div <= 0:
        raise ValueError

@@ -62,16 +68,24 @@ def ready_blocks(video_id, duration, div, callback):
    async def _create_block(session, video_id, seektime, callback):
        continuation = arcparam.getparam(video_id, seektime=seektime)
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
+                if continuation in param_set:
+                    next_continuation, actions = None, []
+                    break
+                param_set.add(continuation)
                resp = await session.get(url, headers=headers)
                next_continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
+            except (NetworkError, ReadTimeout) as e:
+                err = e
+                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))

        if actions:
            first = parser.get_offset(actions[0])
@@ -110,16 +124,24 @@ def fetch_patch(callback, blocks, video_id):

    async def _fetch(continuation, session) -> Patch:
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        err = None
        for _ in range(MAX_RETRY_COUNT):
            try:
+                if continuation in param_set:
+                    continuation, actions = None, []
+                    break
+                param_set.add(continuation)
                resp = await session.get(url, headers=config.headers)
                continuation, actions = parser.parse(resp.json())
                break
            except JSONDecodeError:
                await asyncio.sleep(3)
+            except (NetworkError, ReadTimeout) as e:
+                err = e
+                await asyncio.sleep(3)
        else:
            cancel()
-            raise UnknownConnectionError("Abort: Unknown connection error.")
+            raise UnknownConnectionError("Abort:" + str(err))

        if actions:
            last = parser.get_offset(actions[-1])
--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -93,4 +93,5 @@ class Extractor:
        return ret

    def cancel(self):
+        print("cancel")
        asyncdl.cancel()
--- a/pytchat/tool/extract/parser.py
+++ b/pytchat/tool/extract/parser.py
@@ -42,10 +42,14 @@ def get_offset(item):


 def get_id(item):
-    return list((list(item['replayChatItemAction']["actions"][0].values()
-                      )[0])['item'].values())[0].get('id')
+    a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
+    if a:
+        return list(a.values())[0].get('id')
+    return None


 def get_type(item):
-    return list((list(item['replayChatItemAction']["actions"][0].values()
-                      )[0])['item'].keys())[0]
+    a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
+    if a:
+        return list(a.keys())[0]
+    return None
--- a/pytchat/tool/extract/worker.py
+++ b/pytchat/tool/extract/worker.py
@@ -7,7 +7,6 @@ from typing import Tuple
 class ExtractWorker:
    """
    ExtractWorker associates a download session with a block.
-
    When the worker finishes fetching, the block
    being fetched is splitted and assigned the free worker.

--- a/pytchat/tool/mining/init.py
+++ b/pytchat/tool/mining/init.py
--- a/pytchat/tool/mining/asyncdl.py
+++ b/pytchat/tool/mining/asyncdl.py
@@ -1,146 +0,0 @@
-
-import httpx
-import asyncio
-import json
-from . import parser
-from . block import Block
-from . worker import ExtractWorker
-from . patch import Patch
-from ... import config
-from ... paramgen import arcparam_mining as arcparam
-from concurrent.futures import CancelledError
-from urllib.parse import quote
-
-headers = config.headers
-REPLAY_URL = "https://www.youtube.com/live_chat_replay?continuation="
-INTERVAL = 1
-
-
-def _split(start, end, count, min_interval_sec=120):
-    """
-    Split section from `start` to `end` into `count` pieces,
-    and returns the beginning of each piece.
-    The `count` is adjusted so that the length of each piece
-    is no smaller than `min_interval`.
-
-    Returns:
-    --------
-        List of the offset of each block's first chat data.
-    """
-
-    if not (isinstance(start, int) or isinstance(start, float)) or \
-       not (isinstance(end, int) or isinstance(end, float)):
-        raise ValueError("start/end must be int or float")
-    if not isinstance(count, int):
-        raise ValueError("count must be int")
-    if start > end:
-        raise ValueError("end must be equal to or greater than start.")
-    if count < 1:
-        raise ValueError("count must be equal to or greater than 1.")
-    if (end - start) / count < min_interval_sec:
-        count = int((end - start) / min_interval_sec)
-        if count == 0:
-            count = 1
-    interval = (end - start) / count
-
-    if count == 1:
-        return [start]
-    return sorted(list(set([int(start + interval * j)
-                            for j in range(count)])))
-
-
-def ready_blocks(video_id, duration, div, callback):
-    if div <= 0:
-        raise ValueError
-
-    async def _get_blocks(video_id, duration, div, callback):
-        async with httpx.ClientSession() as session:
-            tasks = [_create_block(session, video_id, seektime, callback)
-                     for seektime in _split(0, duration, div)]
-            return await asyncio.gather(*tasks)
-
-    async def _create_block(session, video_id, seektime, callback):
-        continuation = arcparam.getparam(video_id, seektime=seektime)
-        url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
-               f"{int(seektime*1000)}&hidden=false&pbj=1")
-        async with session.get(url, headers=headers) as resp:
-            chat_json = await resp.text()
-        if chat_json is None:
-            return
-        continuation, actions = parser.parse(json.loads(chat_json)[1])
-        first = seektime
-        seektime += INTERVAL
-        if callback:
-            callback(actions, INTERVAL)
-        return Block(
-            continuation=continuation,
-            chat_data=actions,
-            first=first,
-            last=seektime,
-            seektime=seektime
-        )
-    """
-    fetch initial blocks.
-    """
-    loop = asyncio.get_event_loop()
-    blocks = loop.run_until_complete(
-        _get_blocks(video_id, duration, div, callback))
-    return blocks
-
-
-def fetch_patch(callback, blocks, video_id):
-
-    async def _allocate_workers():
-        workers = [
-            ExtractWorker(
-                fetch=_fetch, block=block,
-                blocks=blocks, video_id=video_id
-            )
-            for block in blocks
-        ]
-        async with httpx.ClientSession() as session:
-            tasks = [worker.run(session) for worker in workers]
-            return await asyncio.gather(*tasks)
-
-    async def _fetch(seektime, session) -> Patch:
-        continuation = arcparam.getparam(video_id, seektime=seektime)
-        url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
-               f"{int(seektime*1000)}&hidden=false&pbj=1")
-        async with session.get(url, headers=config.headers) as resp:
-            chat_json = await resp.text()
-        actions = []
-        try:
-            if chat_json is None:
-                return Patch()
-            continuation, actions = parser.parse(json.loads(chat_json)[1])
-        except json.JSONDecodeError:
-            pass
-        if callback:
-            callback(actions, INTERVAL)
-        return Patch(chats=actions, continuation=continuation,
-                     seektime=seektime, last=seektime)
-    """
-    allocate workers and assign blocks.
-    """
-    loop = asyncio.get_event_loop()
-    try:
-        loop.run_until_complete(_allocate_workers())
-    except CancelledError:
-        pass
-
-
-async def _shutdown():
-    print("\nshutdown...")
-    tasks = [t for t in asyncio.all_tasks()
-             if t is not asyncio.current_task()]
-    for task in tasks:
-        task.cancel()
-        try:
-            await task
-        except asyncio.CancelledError:
-            pass
-
-
-def cancel():
-    loop = asyncio.get_event_loop()
-    loop.create_task(_shutdown())
--- a/pytchat/tool/mining/block.py
+++ b/pytchat/tool/mining/block.py
@@ -1,62 +0,0 @@
-from . import parser
-class Block:
-    """Block object represents something like a box 
-    to join chunk of chatdata.
-
-    Parameter:
-    ---------
-    first : int :
-        videoOffsetTimeMs of the first chat_data 
-        (chat_data[0])
-        
-    last : int :
-        videoOffsetTimeMs of the last chat_data.
-        (chat_data[-1])
-
-        this value increases as fetching chatdata progresses.
-
-    end : int :
-        target videoOffsetTimeMs of last chat data for extract,
-        equals to first videoOffsetTimeMs of next block.
-        when extract worker reaches this offset, stop fetching.
-
-    continuation : str :
-        continuation param of last chat data.
-
-    chat_data : list 
-
-    done : bool :
-        whether this block has been fetched.
-    
-    remaining : int :
-        remaining data to extract.
-        equals end - last.
-    
-    is_last : bool :
-        whether this block is the last one in blocklist.
-
-    during_split : bool :
-        whether this block is in the process of during_split.
-        while True, this block is excluded from duplicate split procedure.
-    
-    seektime : float :
-        the last position of this block(seconds) already fetched.
-    """
-    
-    __slots__ = ['first','last','end','continuation','chat_data','remaining',
-        'done','is_last','during_split','seektime']
-
-    def __init__(self, first = 0, last = 0, end = 0,
-                continuation = '', chat_data = [], is_last = False,
-                during_split = False, seektime = None):
-        self.first = first
-        self.last = last
-        self.end = end
-        self.continuation = continuation
-        self.chat_data = chat_data
-        self.done = False
-        self.remaining = self.end - self.last
-        self.is_last = is_last
-        self.during_split = during_split
-        self.seektime = seektime
- 
--- a/pytchat/tool/mining/parser.py
+++ b/pytchat/tool/mining/parser.py
@@ -1,73 +0,0 @@
-import re
-from ... import config
-from ... exceptions import (
-    ResponseContextError,
-    NoContents, NoContinuation)
-
-logger = config.logger(__name__)
-
-
-def parse(jsn):
-    """
-    Parse replay chat data.
-    Parameter:
-    ----------
-    jsn : dict
-        JSON of replay chat data.
-    Returns:
-    ------
-        continuation : str
-        actions : list
-
-    """
-    if jsn is None:
-        raise ValueError("parameter JSON is None")
-    if jsn['response']['responseContext'].get('errors'):
-        raise ResponseContextError(
-            'video_id is invalid or private/deleted.')
-    contents = jsn["response"].get('continuationContents')
-    if contents is None:
-        raise NoContents('No chat data.')
-
-    cont = contents['liveChatContinuation']['continuations'][0]
-    if cont is None:
-        raise NoContinuation('No Continuation')
-    metadata = cont.get('liveChatReplayContinuationData')
-    if metadata:
-        continuation = metadata.get("continuation")
-        actions = contents['liveChatContinuation'].get('actions')
-        if continuation:
-            return continuation, [action["replayChatItemAction"]["actions"][0]
-                                  for action in actions
-                                  if list(action['replayChatItemAction']["actions"][0].values()
-                                          )[0]['item'].get("liveChatPaidMessageRenderer")
-                                  or list(action['replayChatItemAction']["actions"][0].values()
-                                          )[0]['item'].get("liveChatPaidStickerRenderer")
-                                  ]
-    return None, []
-
-
-def get_offset(item):
-    return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
-
-
-def get_id(item):
-    return list((list(item['replayChatItemAction']["actions"][0].values()
-                      )[0])['item'].values())[0].get('id')
-
-
-def get_type(item):
-    return list((list(item['replayChatItemAction']["actions"][0].values()
-                      )[0])['item'].keys())[0]
-
-
-_REGEX_YTINIT = re.compile(
-    "window\\[\"ytInitialData\"\\]\\s*=\\s*({.+?});\\s+")
-
-
-def extract(text):
-
-    match = re.findall(_REGEX_YTINIT, str(text))
-    if match:
-        return match[0]
-    return None
--- a/pytchat/tool/mining/patch.py
+++ b/pytchat/tool/mining/patch.py
@@ -1,27 +0,0 @@
-from . import parser
-from . block import Block
-from typing import NamedTuple
-
-class Patch(NamedTuple):
-    """
-    Patch represents chunk of chat data
-    which is fetched by asyncdl.fetch_patch._fetch().
-    """
-    chats : list = []
-    continuation : str = None
-    seektime : float = None
-    first : int = None
-    last : int = None
-
-def fill(block:Block, patch:Patch):
-    if patch.last < block.end:
-        set_patch(block, patch)
-        return
-    block.continuation = None
-
-def set_patch(block:Block, patch:Patch):
-    block.continuation = patch.continuation
-    block.chat_data.extend(patch.chats)
-    block.last = patch.seektime
-    block.seektime = patch.seektime       
-
--- a/pytchat/tool/mining/superchat_miner.py
+++ b/pytchat/tool/mining/superchat_miner.py
@@ -1,72 +0,0 @@
-from . import asyncdl
-from . import parser
-from .. videoinfo import VideoInfo
-from ... import config
-from ... exceptions import InvalidVideoIdException
-logger = config.logger(__name__)
-headers=config.headers
-
-class SuperChatMiner:
-    def __init__(self, video_id, duration, div, callback):
-        if not isinstance(div ,int) or div < 1:
-            raise ValueError('div must be positive integer.')
-        elif div > 10:
-            div = 10
-        if not isinstance(duration ,int) or duration < 1:
-            raise ValueError('duration must be positive integer.')
-        self.video_id = video_id
-        self.duration = duration
-        self.div = div
-        self.callback = callback
-        self.blocks = []
-
-    def _ready_blocks(self):
-        blocks = asyncdl.ready_blocks(
-            self.video_id, self.duration, self.div, self.callback)
-        self.blocks = [block for block in blocks if block is not None]
-        return self  
-
-    def _set_block_end(self):
-        for i in range(len(self.blocks)-1):
-            self.blocks[i].end = self.blocks[i+1].first
-        self.blocks[-1].end = self.duration
-        self.blocks[-1].is_last =True
-        return self
-
-    def _download_blocks(self):
-        asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
-        return self
-
-    def _combine(self):
-        ret = []
-        for block in self.blocks:
-            ret.extend(block.chat_data) 
-        return ret
-
-    def extract(self):
-        return (
-            self._ready_blocks()
-                ._set_block_end()
-                ._download_blocks()
-                ._combine()
-        )
-
-def extract(video_id, div = 1, callback = None, processor = None):
-    duration = 0
-    try:
-        duration = VideoInfo(video_id).get_duration()
-    except InvalidVideoIdException:
-        raise
-    if duration == 0:
-        print("video is live.")
-        return []
-    data = SuperChatMiner(video_id, duration, div, callback).extract()
-    if processor is None:
-        return data
-    return processor.process(
-        [{'video_id':None,'timeout':1,'chatdata' : (action
-        for action in data)}]
-    )
-
-def cancel():
-    asyncdl.cancel()
--- a/pytchat/tool/mining/worker.py
+++ b/pytchat/tool/mining/worker.py
@@ -1,45 +0,0 @@
-from . import parser
-from . block import Block
-from . patch import Patch, fill
-from ... paramgen import arcparam
-INTERVAL = 1
-class ExtractWorker:
-    """
-    ExtractWorker associates a download session with a block.
-
-    When the worker finishes fetching, the block
-    being fetched is splitted and assigned the free worker.
-
-    Parameter
-    ----------
-    fetch : func :
-        extract function of asyncdl
-
-    block : Block :
-        Block object that includes chat_data
-    
-    blocks : list :
-        List of Block(s)
-
-    video_id : str :
-
-    parent_block : Block :
-        the block from which current block is splitted 
-    """
-    __slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
-    def __init__(self, fetch, block, blocks, video_id ):
-        self.block:Block = block
-        self.fetch = fetch
-        self.blocks:list = blocks
-        self.video_id:str = video_id
-        self.parent_block:Block = None
-
-    async def run(self, session):
-        while self.block.continuation:
-            patch = await self.fetch(
-                self.block.seektime, session)
-            fill(self.block, patch)
-            self.block.seektime += INTERVAL
-        self.block.done = True    
-
-
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -1,13 +1,16 @@
+import httpx
 import json
 import re
-import httpx
+import time
+from httpx import ConnectError, NetworkError
 from .. import config
-from ..exceptions import InvalidVideoIdException, PatternUnmatchError
+from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
 from ..util.extract_video_id import extract_video_id

+
 headers = config.headers
                         
-pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
+pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")

 item_channel_id = [
    "videoDetails",
@@ -80,20 +83,39 @@ class VideoInfo:

    def __init__(self, video_id):
        self.video_id = extract_video_id(video_id)
+        for _ in range(3):
+            try:
                text = self._get_page_text(self.video_id)
                self._parse(text)
+                break
+            except PatternUnmatchError:
+                time.sleep(2)
+                pass
+        else:
+            raise PatternUnmatchError("Pattern Unmatch")

    def _get_page_text(self, video_id):
        url = f"https://www.youtube.com/embed/{video_id}"
+        err = None
+        for _ in range(3):
+            try:
                resp = httpx.get(url, headers=headers)
                resp.raise_for_status()
+                break
+            except (ConnectError, NetworkError) as e:
+                err = e
+                time.sleep(3)
+        else:
+            raise UnknownConnectionError(str(err))
+
        return resp.text

    def _parse(self, text):
        result = re.search(pattern, text)
        if result is None:
-            raise PatternUnmatchError(text)
-        res = json.loads(result.group(1)[:-1])
+            raise PatternUnmatchError()
+        decoder = json.JSONDecoder()
+        res = decoder.raw_decode(result.group(1)[:-1])[0]
        response = self._get_item(res, item_response)
        if response is None:
            self._check_video_is_private(res.get("args"))
--- a/pytchat/util/init.py
+++ b/pytchat/util/init.py
@@ -1,8 +1,12 @@
+import datetime
 import httpx
 import json
-import datetime
+import os
+import re
 from .. import config

+PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
+

 def extract(url):
    _session = httpx.Client(http2=True)
@@ -16,3 +20,21 @@ def save(data, filename, extention):
    with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
              mode='w', encoding='utf-8') as f:
        f.writelines(data)
+
+
+def checkpath(filepath):
+    splitter = os.path.splitext(os.path.basename(filepath))
+    body = splitter[0]
+    extention = splitter[1]
+    newpath = filepath
+    counter = 1
+    while os.path.exists(newpath):
+        match = re.search(PATTERN, body)
+        if match:
+            counter = int(match[2]) + 1
+            num_with_bracket = f'({str(counter)})'
+            body = f'{match[1]}{num_with_bracket}'
+        else:
+            body = f'{body}({str(counter)})'
+        newpath = os.path.join(os.path.dirname(filepath), body + extention)
+    return newpath
--- a/tests/test_arcparam_mining.py
+++ b/tests/test_arcparam_mining.py
@@ -1,41 +0,0 @@
-from pytchat.tool.mining import parser
-import pytchat.config as config
-import httpx
-import json
-from pytchat.paramgen import arcparam_mining as arcparam
-
-
-def test_arcparam_e(mocker):
-    try:
-        arcparam.getparam("01234567890", -1)
-        assert False
-    except ValueError:
-        assert True
-
-
-def test_arcparam_0(mocker):
-    param = arcparam.getparam("01234567890", 0)
-
-    assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
-
-
-def test_arcparam_1(mocker):
-    param = arcparam.getparam("01234567890", seektime=100000)
-    print(param)
-    assert param == "op2w0wQzGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABWgUQgMLXL2AEcgIIAXgB"
-
-
-def test_arcparam_2(mocker):
-    param = arcparam.getparam("PZz9NB0-Z64", 1)
-    url = f"https://www.youtube.com/live_chat_replay?continuation={param}&playerOffsetMs=1000&pbj=1"
-    resp = httpx.Client(http2=True).get(url, headers=config.headers)
-    jsn = json.loads(resp.text)
-    _, chatdata = parser.parse(jsn[1])
-    test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatPaidMessageRenderer"]["id"]
-    print(test_id)
-    assert test_id == "ChwKGkNKSGE0YnFJeWVBQ0ZWcUF3Z0VkdGIwRm9R"
-
-
-def test_arcparam_3(mocker):
-    param = arcparam.getparam("01234567890")
-    assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
--- a/tests/test_videoinfo.py
+++ b/tests/test_videoinfo.py
@@ -1,7 +1,6 @@
 from json.decoder import JSONDecodeError
 from pytchat.tool.videoinfo import VideoInfo
 from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
-from pytchat import util


 def _open_file(path):
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
 def test_live_page(mocker):
    _set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
    info = VideoInfo('__test_id__')
-    '''live page :duration = 0'''        
+    '''live page: duration==0'''
    assert info.get_duration() == 0
    assert info.video_id == '__test_id__'
    assert info.get_channel_name() == 'BGM channel'
@@ -88,3 +87,15 @@ def test_pattern_unmatch(mocker):
        assert False
    except PatternUnmatchError:
        assert True
+
+
+def test_extradata_handling(mocker):
+    '''Test case the extracted data are JSON lines.'''
+    _set_test_data(
+        'tests/testdata/videoinfo/extradata_page.txt', mocker)
+    try:
+        _ = VideoInfo('__test_id__')
+        assert True
+    except JSONDecodeError as e:
+        print(e.doc)
+        assert False
--- a/tests/testdata/videoinfo/extradata_page.txt
+++ b/tests/testdata/videoinfo/extradata_page.txt
Author	SHA1	Message	Date
taizan_hokuto	71650c39f7	Merge branch 'hotfix/fix'	2020-10-03 22:42:48 +09:00
taizan_hokuto	488445c73b	Increment version	2020-10-03 22:41:53 +09:00
taizan_hokuto	075e811efe	Delete unnecessary code	2020-10-03 22:41:12 +09:00
taizan_hokuto	58d9bf7fdb	Merge branch 'hotfix/pattern'	2020-10-03 22:35:46 +09:00
taizan_hokuto	b3e6275de7	Increment version	2020-10-03 22:35:22 +09:00
taizan_hokuto	748778f545	Fix pattern matching	2020-10-03 22:04:09 +09:00
taizan-hokuto	e29b3b8377	Merge branch 'hotfix/network'	2020-09-14 00:40:40 +09:00
taizan-hokuto	0859ed5fb1	Increment version	2020-09-14 00:29:21 +09:00
taizan-hokuto	a80d5ba080	Fix handling network error	2020-09-14 00:28:41 +09:00
taizan-hokuto	b7e6043a71	Merge branch 'hotfix/memory'	2020-09-12 02:12:46 +09:00
taizan-hokuto	820ba35013	Increment version	2020-09-12 02:02:07 +09:00
taizan-hokuto	ecd2d130bf	Clear set each time the extraction changes	2020-09-12 01:57:55 +09:00
taizan-hokuto	f77a2c889b	Merge branch 'hotfix/not_quit'	2020-09-12 00:57:48 +09:00
taizan-hokuto	47d5ab288f	Increment version	2020-09-12 00:49:37 +09:00
taizan-hokuto	5f53fd24dd	Format	2020-09-12 00:48:40 +09:00
taizan-hokuto	11a9d0e2d7	Fix a problem with extraction not completing	2020-09-12 00:42:30 +09:00
taizan-hokuto	480c9e15b8	Merge branch 'hotfix/continue_error'	2020-09-11 00:21:07 +09:00
taizan-hokuto	35aa7636f6	Increment version	2020-09-11 00:20:24 +09:00
taizan-hokuto	8fee67c2d4	Fix handling video info error	2020-09-11 00:18:09 +09:00
taizan-hokuto	d3f1643a40	Merge branch 'release/v0.2.1'	2020-09-09 22:23:01 +09:00
taizan-hokuto	eb29f27493	Increment version	2020-09-09 22:22:31 +09:00
taizan-hokuto	8adf75ab83	Merge branch 'feature/pbar' into develop	2020-09-09 22:20:36 +09:00
taizan-hokuto	2e05803d75	Remove unnecessary option	2020-09-09 22:20:09 +09:00
taizan-hokuto	f16c0ee73a	Fix progress bar line feed and remove pbar option	2020-09-09 22:19:10 +09:00
taizan-hokuto	a338f2b782	Merge tag 'v0.2.0' into develop v0.2.0	2020-09-07 23:35:45 +09:00
taizan-hokuto	864ccddfd7	Merge branch 'release/v0.2.0'	2020-09-07 23:35:44 +09:00
taizan-hokuto	339df69e36	Increment version	2020-09-07 23:35:14 +09:00
taizan-hokuto	76a5b0cd18	Merge branch 'feature/new_item' into develop	2020-09-07 23:34:16 +09:00
taizan-hokuto	be0ab2431b	Delete test for unuse module	2020-09-07 23:33:26 +09:00
taizan-hokuto	2edb60c592	Delete unuse modules	2020-09-07 23:31:32 +09:00
taizan-hokuto	2c6c3a1ca3	Delete old progress bar	2020-09-07 23:30:49 +09:00
taizan-hokuto	4be540793d	Delete unnecessary blank lines	2020-09-07 23:30:30 +09:00
taizan-hokuto	08b86fe596	Make it possible to switch progress bar	2020-09-07 23:29:48 +09:00
taizan-hokuto	157f3b9952	Fix handling when missing id and type	2020-09-07 23:28:03 +09:00
taizan-hokuto	8f3ca2662a	Merge tag 'pbar' into develop v0.1.9	2020-09-06 18:58:34 +09:00
taizan-hokuto	c4b015861c	Merge branch 'hotfix/pbar'	2020-09-06 18:58:33 +09:00
taizan-hokuto	3aa413d59e	Increment version	2020-09-06 18:54:10 +09:00
taizan-hokuto	03ba285a16	Fix callback handling	2020-09-06 18:53:35 +09:00
taizan-hokuto	5fe0ee5aa8	Merge tag 'v0.1.8' into develop v0.1.8	2020-09-06 18:27:58 +09:00
taizan-hokuto	4e829a25d4	Merge branch 'release/v0.1.8'	2020-09-06 18:27:57 +09:00
taizan-hokuto	15132a9bb8	Increment version	2020-09-06 18:27:08 +09:00
taizan-hokuto	64ace9dad6	Update progress bar	2020-09-06 18:25:16 +09:00
taizan-hokuto	9a2e96d3a0	Merge tag 'extract_vid' into develop v0.1.7	2020-09-04 01:55:42 +09:00
taizan-hokuto	a3695a59b8	Merge branch 'hotfix/extract_vid'	2020-09-04 01:55:41 +09:00
taizan-hokuto	bc8655ed62	Increment version	2020-09-04 01:53:14 +09:00
taizan-hokuto	3bdc465740	Devide exception handling	2020-09-04 01:52:53 +09:00
taizan-hokuto	235d6b7212	Fix extract video info	2020-09-04 01:46:10 +09:00
taizan-hokuto	9f0754da57	Merge tag 'http2' into develop v0.1.6	2020-09-03 21:27:48 +09:00
taizan-hokuto	0e301f48a8	Merge tag 'v0.1.5' into develop v0.1.5	2020-09-03 20:16:56 +09:00