Merge branch 'release/v0.1.8'

Increment version
Update progress bar
2020-09-06 18:27:57 +09:00 · 2020-09-06 18:27:08 +09:00 · 2020-09-06 18:25:16 +09:00 · 2020-09-04 01:55:42 +09:00 · 2020-09-04 01:55:41 +09:00 · 2020-09-04 01:53:14 +09:00
7 changed files with 91 additions and 35 deletions
--- a/pytchat/init.py
+++ b/pytchat/init.py
@@ -2,7 +2,7 @@
 pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
 """
 __copyright__    = 'Copyright (C) 2019 taizan-hokuto'
-__version__      = '0.1.6'
+__version__      = '0.1.8'
 __license__      = 'MIT'
 __author__       = 'taizan-hokuto'
 __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
--- a/pytchat/cli/init.py
+++ b/pytchat/cli/init.py
@@ -62,23 +62,36 @@ def main():

            print(f" output path: {path.resolve()}")
            duration = info.get_duration()
-            pbar = ProgressBar(duration)
-            ex = Extractor(video_id,
-                    processor=HTMLArchiver(Arguments().output + video_id + '.html'),
+            pbar = ProgressBar(total=(duration * 1000) / 0.99, status="Extracting")
+            ex = Extractor(video_id,                    
                    callback=pbar._disp,
                    div=10)
            signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
-            ex.extract()
+            data = ex.extract()
+            if data == []:
+                return False
+            pbar.reset("#", "=", total=len(data), status="Rendering  ")
+            processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
+            processor.process(
+                [{'video_id': None,
+                'timeout': 1,
+                'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
+            )
+            processor.finalize()
+            pbar.reset('#', '#', status='Completed   ')
            pbar.close()
+            print()
            if pbar.is_cancelled():
                print("\nThe extraction process has been discontinued.\n")
-                return
-            print("\nThe extraction process has been completed.\n")
+                return False
+            return True
+
        except InvalidVideoIdException:
            print("Invalid Video ID or URL:", video_id)
-        except (TypeError, NoContents) as e:
-
+        except TypeError as e:
            print(e.with_traceback())
+        except NoContents as e:
+            print(e)
        except FileNotFoundError:
            print("The specified directory does not exist.:{}".format(Arguments().output))
        except JSONDecodeError as e:
--- a/pytchat/cli/progressbar.py
+++ b/pytchat/cli/progressbar.py
@@ -6,33 +6,46 @@ https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
 '''
 import sys

+ROT = ['\u25F4', '\u25F5', '\u25F6', '\u25F7']
+

 class ProgressBar:
-    def __init__(self, duration):
-        self._duration = duration
-        self._count = 0
+    def __init__(self, total, status):
        self._bar_len = 60
        self._cancelled = False
+        self.reset(total=total, status=status)
+        self._blinker = 0
+
+    def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
+        self._symbol_done = symbol_done
+        self._symbol_space = symbol_space
+        self._total = total
+        self._status = status
+        self._count = 0

    def _disp(self, _, fetched):
-        self._progress(fetched / 1000, self._duration)
+        self._progress(fetched, self._total)

-    def _progress(self, fillin, total, status=''):
+    def _progress(self, fillin, total):
        if total == 0 or self._cancelled:
            return
        self._count += fillin
        filled_len = int(round(self._bar_len * self._count / float(total)))
        percents = round(100.0 * self._count / float(total), 1)
+        if percents > 100:
+            percents = 100.0
        if filled_len > self._bar_len:
            filled_len = self._bar_len
-            percents = 100
-        bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
-        sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
+            
+        bar = self._symbol_done * filled_len + \
+              self._symbol_space * (self._bar_len - filled_len)
+        sys.stdout.write(' [%s] %s%s ...%s %s \r' % (bar, percents, '%', self._status, ROT[self._blinker % 4]))
        sys.stdout.flush()
+        self._blinker += 1

    def close(self):
        if not self._cancelled:
-            self._progress(self._duration, self._duration)
+            self._progress(self._total, self._total)

    def cancel(self):
        self._cancelled = True
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -43,20 +43,21 @@ class HTMLArchiver(ChatProcessor):
    '''
    HTMLArchiver saves chat data as HTML table format.
    '''
-    def __init__(self, save_path):
+    def __init__(self, save_path, callback):
        super().__init__()
        self.save_path = self._checkpath(save_path)
        self.processor = DefaultProcessor()
        self.emoji_table = {}  # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
        self.header = [HEADER_HTML]
        self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
+        self.callback = callback

    def _checkpath(self, filepath):
        splitter = os.path.splitext(os.path.basename(filepath))
        body = splitter[0]
        extention = splitter[1]
        newpath = filepath
-        counter = 0
+        counter = 1
        while os.path.exists(newpath):
            match = re.search(PATTERN, body)
            if match:
@@ -80,17 +81,19 @@ class HTMLArchiver(ChatProcessor):
        """
        if chat_components is None or len(chat_components) == 0:
            return
-        self.body.extend(
-            (self._parse_html_line((
-                c.datetime,
-                c.elapsedTime,
-                c.author.name,
-                self._parse_message(c.messageEx),
-                c.amountString,
-                c.author.type,
-                c.author.channelId)
-            ) for c in self.processor.process(chat_components).items)
-        )
+        for c in self.processor.process(chat_components).items:
+            self.body.extend(
+                self._parse_html_line((
+                    c.datetime,
+                    c.elapsedTime,
+                    c.author.name,
+                    self._parse_message(c.messageEx),
+                    c.amountString,
+                    c.author.type,
+                    c.author.channelId)
+                )
+            )
+            self.callback(None, 1)

    def _parse_html_line(self, raw_line):
        return ''.join(('<tr>',
@@ -131,7 +134,7 @@ class HTMLArchiver(ChatProcessor):
    
    def finalize(self):
        self.header.extend([self._create_styles(), '</head>\n'])
-        self.body.extend(['</table>\n</body>'])
+        self.body.extend(['</table>\n</body>\n</html>'])
        with open(self.save_path, mode='a', encoding='utf-8') as f:
            f.writelines(self.header)
            f.writelines(self.body)
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -93,7 +93,8 @@ class VideoInfo:
        result = re.search(pattern, text)
        if result is None:
            raise PatternUnmatchError(text)
-        res = json.loads(result.group(1)[:-1])
+        decoder = json.JSONDecoder()
+        res = decoder.raw_decode(result.group(1)[:-1])[0]
        response = self._get_item(res, item_response)
        if response is None:
            self._check_video_is_private(res.get("args"))
--- a/tests/test_videoinfo.py
+++ b/tests/test_videoinfo.py
@@ -1,7 +1,6 @@
 from json.decoder import JSONDecodeError
 from pytchat.tool.videoinfo import VideoInfo
 from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
-from pytchat import util


 def _open_file(path):
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
 def test_live_page(mocker):
    _set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
    info = VideoInfo('__test_id__')
-    '''live page :duration = 0'''        
+    '''live page: duration==0'''
    assert info.get_duration() == 0
    assert info.video_id == '__test_id__'
    assert info.get_channel_name() == 'BGM channel'
@@ -88,3 +87,15 @@ def test_pattern_unmatch(mocker):
        assert False
    except PatternUnmatchError:
        assert True
+
+
+def test_extradata_handling(mocker):
+    '''Test case the extracted data are JSON lines.'''
+    _set_test_data(
+        'tests/testdata/videoinfo/extradata_page.txt', mocker)
+    try:
+        _ = VideoInfo('__test_id__')
+        assert True
+    except JSONDecodeError as e:
+        print(e.doc)
+        assert False
--- a/tests/testdata/videoinfo/extradata_page.txt
+++ b/tests/testdata/videoinfo/extradata_page.txt
Author	SHA1	Message	Date
taizan-hokuto	4e829a25d4	Merge branch 'release/v0.1.8'	2020-09-06 18:27:57 +09:00
taizan-hokuto	15132a9bb8	Increment version	2020-09-06 18:27:08 +09:00
taizan-hokuto	64ace9dad6	Update progress bar	2020-09-06 18:25:16 +09:00
taizan-hokuto	9a2e96d3a0	Merge tag 'extract_vid' into develop v0.1.7	2020-09-04 01:55:42 +09:00
taizan-hokuto	a3695a59b8	Merge branch 'hotfix/extract_vid'	2020-09-04 01:55:41 +09:00
taizan-hokuto	bc8655ed62	Increment version	2020-09-04 01:53:14 +09:00
taizan-hokuto	3bdc465740	Devide exception handling	2020-09-04 01:52:53 +09:00
taizan-hokuto	235d6b7212	Fix extract video info	2020-09-04 01:46:10 +09:00
taizan-hokuto	9f0754da57	Merge tag 'http2' into develop v0.1.6	2020-09-03 21:27:48 +09:00
taizan-hokuto	0e301f48a8	Merge tag 'v0.1.5' into develop v0.1.5	2020-09-03 20:16:56 +09:00