From 971e4bdf39d0389639ee6fe455f158d438a7a192 Mon Sep 17 00:00:00 2001
From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Sun, 12 Jul 2020 23:23:05 +0900
Subject: [PATCH 1/5] Add finalize function to processor
---
pytchat/core_async/livechat.py | 3 ++-
pytchat/core_multithread/livechat.py | 1 +
pytchat/processors/chat_processor.py | 7 +++++++
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py
index 715064e..2cc3ff8 100644
--- a/pytchat/core_async/livechat.py
+++ b/pytchat/core_async/livechat.py
@@ -325,7 +325,8 @@ class LiveChatAsync:
self._pauser.put_nowait(None)
self._is_alive = False
self._buffer.put_nowait({})
-
+ self.processor.finalize()
+
def _task_finished(self):
'''
Listenerを終了する。
diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py
index e06bc8a..7f99c55 100644
--- a/pytchat/core_multithread/livechat.py
+++ b/pytchat/core_multithread/livechat.py
@@ -316,6 +316,7 @@ class LiveChat:
self._is_alive = False
self._buffer.put({})
self._event.set()
+ self.processor.finalize()
def _task_finished(self):
'''
diff --git a/pytchat/processors/chat_processor.py b/pytchat/processors/chat_processor.py
index 98d2227..3af82e7 100644
--- a/pytchat/processors/chat_processor.py
+++ b/pytchat/processors/chat_processor.py
@@ -21,3 +21,10 @@ class ChatProcessor:
}
'''
pass
+
+ def finalize(self, *args, **kwargs):
+ '''
+ Interface for finalizing the process.
+ Called when chat fetching finished.
+ '''
+ pass
From 133a8afb278a6aee48472887f6bbdbed7aae59d2 Mon Sep 17 00:00:00 2001
From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Sun, 12 Jul 2020 23:24:43 +0900
Subject: [PATCH 2/5] Make it possible to embed custom emojis in HTML
---
pytchat/processors/default/renderer/base.py | 7 +-
pytchat/processors/html_archiver.py | 126 +++++++++++++-------
pytchat/tool/extract/extractor.py | 4 +-
3 files changed, 92 insertions(+), 45 deletions(-)
diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py
index 1e42619..64fbecc 100644
--- a/pytchat/processors/default/renderer/base.py
+++ b/pytchat/processors/default/renderer/base.py
@@ -52,8 +52,11 @@ class BaseRenderer:
if r:
if r.get('emoji'):
message += r['emoji'].get('shortcuts', [''])[0]
- message_ex.append(
- r['emoji']['image']['thumbnails'][1].get('url'))
+ message_ex.append({
+ 'id': r['emoji'].get('emojiId').split('/')[-1],
+ 'txt': r['emoji'].get('shortcuts', [''])[0],
+ 'url': r['emoji']['image']['thumbnails'][0].get('url')
+ })
else:
message += r.get('text', '')
message_ex.append(r.get('text', ''))
diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py
index 397d31e..dba8d22 100644
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -1,31 +1,56 @@
import os
import re
+import requests
+from base64 import standard_b64encode
from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
+
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
+
fmt_headers = ['datetime', 'elapsed', 'authorName',
'message', 'superchat', 'type', 'authorChannel']
HEADER_HTML = '''
-
+
+
+'''
+
+TABLE_CSS = '''
+table.css {
+ border-collapse: collapse;
+}
+
+table.css thead{
+ border-collapse: collapse;
+ border: 1px solid #000
+}
+
+table.css tr td{
+ padding: 0.3em;
+ border: 1px solid #000
+}
+
+table.css th{
+ padding: 0.3em;
+ border: 1px solid #000
+}
'''
class HTMLArchiver(ChatProcessor):
'''
- HtmlArchiver saves chat data as HTML table format.
+ HTMLArchiver saves chat data as HTML table format.
'''
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
- with open(self.save_path, mode='a', encoding='utf-8') as f:
- f.write(HEADER_HTML)
- f.write('')
- f.writelines(self._parse_html_header(fmt_headers))
self.processor = DefaultProcessor()
+ self.emoji_table = {}
+ self.header = [HEADER_HTML]
+ self.body = ['\n', '\n', self._parse_table_header(fmt_headers)]
def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath))
@@ -56,42 +81,59 @@ class HTMLArchiver(ChatProcessor):
"""
if chat_components is None or len(chat_components) == 0:
return
-
- with open(self.save_path, mode='a', encoding='utf-8') as f:
- chats = self.processor.process(chat_components).items
- for c in chats:
- f.writelines(
- self._parse_html_line([
- c.datetime,
- c.elapsedTime,
- c.author.name,
- c.message,
- c.amountString,
- c.author.type,
- c.author.channelId]
- )
- )
- '''
- #Palliative treatment#
- Comment out below line to prevent the table
- display from collapsing.
- '''
- # f.write('
')
+ # chats = self.processor.process(chat_components).items
+ self.body.extend(
+ (self._parse_html_line((
+ c.datetime,
+ c.elapsedTime,
+ c.author.name,
+ self._parse_message(c.messageEx),
+ c.amountString,
+ c.author.type,
+ c.author.channelId)
+ ) for c in self.processor.process(chat_components).items)
+ )
def _parse_html_line(self, raw_line):
- html = ''
- html += ' '
- for cell in raw_line:
- html += '| ' + cell + ' | '
- html += '
\n'
- return html
+ return ''.join(('',
+ ''.join(''.join(('| ', cell, ' | ')) for cell in raw_line),
+ '
\n'))
- def _parse_html_header(self, raw_line):
- html = ''
- html += '\n'
- html += ' '
- for cell in raw_line:
- html += '| ' + cell + ' | '
- html += '
\n'
- html += '\n'
- return html
+ def _parse_table_header(self, raw_line):
+ return ''.join(('',
+ ''.join(''.join(('| ', cell, ' | ')) for cell in raw_line),
+ '
\n'))
+
+ def _parse_message(self, message_items: list) -> str:
+ return ''.join(''.join((''))
+ if type(item) is dict else item
+ for item in message_items)
+
+ def _encode_img(self, url):
+ resp = requests.get(url)
+ return standard_b64encode(resp.content).decode()
+
+ def _set_emoji_table(self, item: dict):
+ emoji_id = item['id']
+ if emoji_id not in self.emoji_table:
+ self.emoji_table.setdefault(emoji_id, self._encode_img(item['url']))
+ return emoji_id
+
+ def _stylecode(self, name, code, width, height):
+ return ''.join((".", name, " { display: inline-block; background-image: url(data:image/png;base64,",
+ code, "); background-repeat: no-repeat; width: ",
+ str(width), "; height: ", str(height), ";}"))
+
+ def _create_styles(self):
+ return '\n'.join(('\n'))
+
+ def finalize(self):
+ self.header.extend([self._create_styles(), '\n'])
+ self.body.extend(['
\n'])
+ with open(self.save_path, mode='a', encoding='utf-8') as f:
+ f.writelines(self.header)
+ f.writelines(self.body)
diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py
index 1110e14..2b421af 100644
--- a/pytchat/tool/extract/extractor.py
+++ b/pytchat/tool/extract/extractor.py
@@ -83,11 +83,13 @@ class Extractor:
data = self._execute_extract_operations()
if self.processor is None:
return data
- return self.processor.process(
+ ret = self.processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
+ self.processor.finalize()
+ return ret
def cancel(self):
asyncdl.cancel()
From 5dfd883fc953fd6dd863ab04f9ab91264770d7f8 Mon Sep 17 00:00:00 2001
From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Sun, 12 Jul 2020 23:47:02 +0900
Subject: [PATCH 3/5] Remove unnecessary line
---
pytchat/processors/html_archiver.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py
index dba8d22..037b464 100644
--- a/pytchat/processors/html_archiver.py
+++ b/pytchat/processors/html_archiver.py
@@ -43,12 +43,11 @@ class HTMLArchiver(ChatProcessor):
'''
HTMLArchiver saves chat data as HTML table format.
'''
-
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor()
- self.emoji_table = {}
+ self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML]
self.body = ['\n', '\n', self._parse_table_header(fmt_headers)]
@@ -81,7 +80,6 @@ class HTMLArchiver(ChatProcessor):
"""
if chat_components is None or len(chat_components) == 0:
return
- # chats = self.processor.process(chat_components).items
self.body.extend(
(self._parse_html_line((
c.datetime,
From b7ff2b6537ea7b2a7bde0655658ad2d3fc32ca0a Mon Sep 17 00:00:00 2001
From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Mon, 13 Jul 2020 00:59:20 +0900
Subject: [PATCH 4/5] Restore logging settings
---
pytchat/config/__init__.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py
index 81d91cf..e374bc5 100644
--- a/pytchat/config/__init__.py
+++ b/pytchat/config/__init__.py
@@ -1,9 +1,9 @@
+import logging
from . import mylogger
-
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
-def logger(module_name: str, loglevel=None):
+def logger(module_name: str, loglevel=logging.DEBUG):
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
return module_logger
From 366d75c2bbdf67dd022ad89e7567c94af265494f Mon Sep 17 00:00:00 2001
From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Mon, 13 Jul 2020 01:44:49 +0900
Subject: [PATCH 5/5] Update README
---
README.md | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 694ad8a..50a26fe 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ pip install pytchat
### CLI
One-liner command.
-Save chat data to html.
+Save chat data to html, with embedded custom emojis.
```bash
$ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/"
@@ -148,6 +148,20 @@ def main():
if __name__ == '__main__':
main()
```
+### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
+```python
+from pytchat import HTMLArchiver, Extractor
+
+video_id = "*******"
+ex = Extractor(
+ video_id,
+ div=10,
+ processor=HTMLArchiver("c:/test.html")
+)
+
+ex.extract()
+print("finished.")
+```
## Structure of Default Processor
Each item can be got with `items` function.
@@ -175,7 +189,7 @@ Each item can be got with `items` function.
| messageEx |
str |
- list of message texts and emoji URLs. |
+ list of message texts and emoji dicts(id, txt, url). |
| timestamp |