Merge branch 'feature/emoji_embedding' into develop

2020-07-13 01:45:07 +09:00
parent a7379fd93f 366d75c2bb
commit 75a31bd245
8 changed files with 119 additions and 51 deletions
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ pip install pytchat
 ### CLI
 One-liner command.
-Save chat data to html.
+Save chat data to html, with embedded custom emojis.
 ```bash
 $ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/"
@@ -148,6 +148,20 @@ def main():
 if __name__ == '__main__':
  main()
 ```
 ### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
 ```python
 from pytchat import HTMLArchiver, Extractor
 video_id = "*******"
 ex = Extractor(
    video_id,
    div=10,
    processor=HTMLArchiver("c:/test.html")
 )
 ex.extract()
 print("finished.")
 ```
 ## Structure of Default Processor
 Each item can be got with `items` function.
@@ -175,7 +189,7 @@ Each item can be got with `items` function.
  <tr>
    <td>messageEx</td>
    <td>str</td>
-    <td>list of message texts and emoji URLs.</td>
+    <td>list of message texts and emoji dicts(id, txt, url).</td>
  </tr>
  <tr>
    <td>timestamp</td>
--- a/pytchat/config/init.py
+++ b/pytchat/config/init.py
@@ -1,9 +1,9 @@
 import logging
 from . import mylogger
 headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
-def logger(module_name: str, loglevel=None):
+def logger(module_name: str, loglevel=logging.DEBUG):
    module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
    return module_logger
--- a/pytchat/core_async/livechat.py
+++ b/pytchat/core_async/livechat.py
@@ -325,7 +325,8 @@ class LiveChatAsync:
            self._pauser.put_nowait(None)
        self._is_alive = False
        self._buffer.put_nowait({})
-
+        self.processor.finalize()
    def _task_finished(self):
        '''
        Listenerを終了する。
--- a/pytchat/core_multithread/livechat.py
+++ b/pytchat/core_multithread/livechat.py
@@ -316,6 +316,7 @@ class LiveChat:
        self._is_alive = False
        self._buffer.put({})
        self._event.set()
        self.processor.finalize()
    def _task_finished(self):
        '''
--- a/pytchat/processors/chat_processor.py
+++ b/pytchat/processors/chat_processor.py
@@ -21,3 +21,10 @@ class ChatProcessor:
            }
        '''
        pass
    def finalize(self, *args, **kwargs):
        '''
        Interface for finalizing the process.
        Called when chat fetching finished.
        '''
        pass
--- a/pytchat/processors/default/renderer/base.py
+++ b/pytchat/processors/default/renderer/base.py
@@ -52,8 +52,11 @@ class BaseRenderer:
                    if r:
                        if r.get('emoji'):
                            message += r['emoji'].get('shortcuts', [''])[0]
-                            message_ex.append(
+                            message_ex.append({
-                                r['emoji']['image']['thumbnails'][1].get('url'))
+                                'id': r['emoji'].get('emojiId').split('/')[-1],
                                'txt': r['emoji'].get('shortcuts', [''])[0],
                                'url': r['emoji']['image']['thumbnails'][0].get('url')
                            })
                        else:
                            message += r.get('text', '')
                            message_ex.append(r.get('text', ''))
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -1,31 +1,55 @@
 import os
 import re
 import requests
 from base64 import standard_b64encode
 from .chat_processor import ChatProcessor
 from .default.processor import DefaultProcessor
 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
 fmt_headers = ['datetime', 'elapsed', 'authorName',
               'message', 'superchat', 'type', 'authorChannel']
 HEADER_HTML = '''
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
 <html>
 <head>
 '''
 TABLE_CSS = '''
 table.css {
 border-collapse: collapse;
 }
 table.css thead{
 border-collapse: collapse;
 border: 1px solid #000
 }
 table.css tr td{
 padding: 0.3em;
 border: 1px solid #000
 }
 table.css th{
 padding: 0.3em;
 border: 1px solid #000
 }
 '''
 class HTMLArchiver(ChatProcessor):
    '''
-    HtmlArchiver saves chat data as HTML table format.
+    HTMLArchiver saves chat data as HTML table format.
    '''
    def __init__(self, save_path):
        super().__init__()
        self.save_path = self._checkpath(save_path)
        with open(self.save_path, mode='a', encoding='utf-8') as f:
            f.write(HEADER_HTML)
            f.write('<table border="1" style="border-collapse: collapse">')
            f.writelines(self._parse_html_header(fmt_headers))
        self.processor = DefaultProcessor()
        self.emoji_table = {}  # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
        self.header = [HEADER_HTML]
        self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
    def _checkpath(self, filepath):
        splitter = os.path.splitext(os.path.basename(filepath))
@@ -56,42 +80,58 @@ class HTMLArchiver(ChatProcessor):
        """
        if chat_components is None or len(chat_components) == 0:
            return
-
+        self.body.extend(
-        with open(self.save_path, mode='a', encoding='utf-8') as f:
+            (self._parse_html_line((
-            chats = self.processor.process(chat_components).items
+                c.datetime,
-            for c in chats:
+                c.elapsedTime,
-                f.writelines(
+                c.author.name,
-                    self._parse_html_line([
+                self._parse_message(c.messageEx),
-                        c.datetime,
+                c.amountString,
-                        c.elapsedTime,
+                c.author.type,
-                        c.author.name,
+                c.author.channelId)
-                        c.message,
+            ) for c in self.processor.process(chat_components).items)
-                        c.amountString,
+        )
                        c.author.type,
                        c.author.channelId]
                    )
                )
            '''
            #Palliative treatment#
            Comment out below line to prevent the table
            display from collapsing.
            '''
            # f.write('</table>')
    def _parse_html_line(self, raw_line):
-        html = ''
+        return ''.join(('<tr>',
-        html += ' <tr>'
+                        ''.join(''.join(('<td>', cell, '</td>')) for cell in raw_line),
-        for cell in raw_line:
+                        '</tr>\n'))
            html += '<td>' + cell + '</td>'
        html += '</tr>\n'
        return html
-    def _parse_html_header(self, raw_line):
+    def _parse_table_header(self, raw_line):
-        html = ''
+        return ''.join(('<thead><tr>',
-        html += '<thead>\n'
+                        ''.join(''.join(('<th>', cell, '</th>')) for cell in raw_line),
-        html += ' <tr>'
+                        '</tr></thead>\n'))
-        for cell in raw_line:
+        
-            html += '<th>' + cell + '</th>'
+    def _parse_message(self, message_items: list) -> str:
-        html += '</tr>\n'
+        return ''.join(''.join(('<span class="', self._set_emoji_table(item), '" title="', item['txt'], '"></span>'))
-        html += '</thead>\n'
+                       if type(item) is dict else item
-        return html
+                       for item in message_items)
    def _encode_img(self, url):
        resp = requests.get(url)
        return standard_b64encode(resp.content).decode()
    def _set_emoji_table(self, item: dict):
        emoji_id = item['id']
        if emoji_id not in self.emoji_table:
            self.emoji_table.setdefault(emoji_id, self._encode_img(item['url']))
        return emoji_id
    def _stylecode(self, name, code, width, height):
        return ''.join((".", name, " { display: inline-block; background-image: url(data:image/png;base64,",
                        code, "); background-repeat: no-repeat; width: ",
                        str(width), "; height: ", str(height), ";}"))
    def _create_styles(self):
        return '\n'.join(('<style type="text/css">',
                          TABLE_CSS,
                          '\n'.join(self._stylecode(key, self.emoji_table[key], 24, 24)
                                for key in self.emoji_table.keys()),
                          '</style>\n'))
    def finalize(self):
        self.header.extend([self._create_styles(), '</head>\n'])
        self.body.extend(['</table>\n</body>'])
        with open(self.save_path, mode='a', encoding='utf-8') as f:
            f.writelines(self.header)
            f.writelines(self.body)
--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -83,11 +83,13 @@ class Extractor:
        data = self._execute_extract_operations()
        if self.processor is None:
            return data
-        return self.processor.process(
+        ret = self.processor.process(
            [{'video_id': None,
              'timeout': 1,
              'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
        )
        self.processor.finalize()
        return ret
    def cancel(self):
        asyncdl.cancel()