From 971e4bdf39d0389639ee6fe455f158d438a7a192 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sun, 12 Jul 2020 23:23:05 +0900 Subject: [PATCH 1/5] Add finalize function to processor --- pytchat/core_async/livechat.py | 3 ++- pytchat/core_multithread/livechat.py | 1 + pytchat/processors/chat_processor.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 715064e..2cc3ff8 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -325,7 +325,8 @@ class LiveChatAsync: self._pauser.put_nowait(None) self._is_alive = False self._buffer.put_nowait({}) - + self.processor.finalize() + def _task_finished(self): ''' Listenerを終了する。 diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index e06bc8a..7f99c55 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -316,6 +316,7 @@ class LiveChat: self._is_alive = False self._buffer.put({}) self._event.set() + self.processor.finalize() def _task_finished(self): ''' diff --git a/pytchat/processors/chat_processor.py b/pytchat/processors/chat_processor.py index 98d2227..3af82e7 100644 --- a/pytchat/processors/chat_processor.py +++ b/pytchat/processors/chat_processor.py @@ -21,3 +21,10 @@ class ChatProcessor: } ''' pass + + def finalize(self, *args, **kwargs): + ''' + Interface for finalizing the process. + Called when chat fetching finished. + ''' + pass From 133a8afb278a6aee48472887f6bbdbed7aae59d2 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sun, 12 Jul 2020 23:24:43 +0900 Subject: [PATCH 2/5] Make it possible to embed custom emojis in HTML --- pytchat/processors/default/renderer/base.py | 7 +- pytchat/processors/html_archiver.py | 126 +++++++++++++------- pytchat/tool/extract/extractor.py | 4 +- 3 files changed, 92 insertions(+), 45 deletions(-) diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py index 1e42619..64fbecc 100644 --- a/pytchat/processors/default/renderer/base.py +++ b/pytchat/processors/default/renderer/base.py @@ -52,8 +52,11 @@ class BaseRenderer: if r: if r.get('emoji'): message += r['emoji'].get('shortcuts', [''])[0] - message_ex.append( - r['emoji']['image']['thumbnails'][1].get('url')) + message_ex.append({ + 'id': r['emoji'].get('emojiId').split('/')[-1], + 'txt': r['emoji'].get('shortcuts', [''])[0], + 'url': r['emoji']['image']['thumbnails'][0].get('url') + }) else: message += r.get('text', '') message_ex.append(r.get('text', '')) diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index 397d31e..dba8d22 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -1,31 +1,56 @@ import os import re +import requests +from base64 import standard_b64encode from .chat_processor import ChatProcessor from .default.processor import DefaultProcessor + PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") + fmt_headers = ['datetime', 'elapsed', 'authorName', 'message', 'superchat', 'type', 'authorChannel'] HEADER_HTML = ''' - + + +''' + +TABLE_CSS = ''' +table.css { + border-collapse: collapse; +} + +table.css thead{ + border-collapse: collapse; + border: 1px solid #000 +} + +table.css tr td{ + padding: 0.3em; + border: 1px solid #000 +} + +table.css th{ + padding: 0.3em; + border: 1px solid #000 +} ''' class HTMLArchiver(ChatProcessor): ''' - HtmlArchiver saves chat data as HTML table format. + HTMLArchiver saves chat data as HTML table format. ''' def __init__(self, save_path): super().__init__() self.save_path = self._checkpath(save_path) - with open(self.save_path, mode='a', encoding='utf-8') as f: - f.write(HEADER_HTML) - f.write('') - f.writelines(self._parse_html_header(fmt_headers)) self.processor = DefaultProcessor() + self.emoji_table = {} + self.header = [HEADER_HTML] + self.body = ['\n', '
\n', self._parse_table_header(fmt_headers)] def _checkpath(self, filepath): splitter = os.path.splitext(os.path.basename(filepath)) @@ -56,42 +81,59 @@ class HTMLArchiver(ChatProcessor): """ if chat_components is None or len(chat_components) == 0: return - - with open(self.save_path, mode='a', encoding='utf-8') as f: - chats = self.processor.process(chat_components).items - for c in chats: - f.writelines( - self._parse_html_line([ - c.datetime, - c.elapsedTime, - c.author.name, - c.message, - c.amountString, - c.author.type, - c.author.channelId] - ) - ) - ''' - #Palliative treatment# - Comment out below line to prevent the table - display from collapsing. - ''' - # f.write('
') + # chats = self.processor.process(chat_components).items + self.body.extend( + (self._parse_html_line(( + c.datetime, + c.elapsedTime, + c.author.name, + self._parse_message(c.messageEx), + c.amountString, + c.author.type, + c.author.channelId) + ) for c in self.processor.process(chat_components).items) + ) def _parse_html_line(self, raw_line): - html = '' - html += ' ' - for cell in raw_line: - html += '' + cell + '' - html += '\n' - return html + return ''.join(('', + ''.join(''.join(('', cell, '')) for cell in raw_line), + '\n')) - def _parse_html_header(self, raw_line): - html = '' - html += '\n' - html += ' ' - for cell in raw_line: - html += '' + cell + '' - html += '\n' - html += '\n' - return html + def _parse_table_header(self, raw_line): + return ''.join(('', + ''.join(''.join(('', cell, '')) for cell in raw_line), + '\n')) + + def _parse_message(self, message_items: list) -> str: + return ''.join(''.join(('')) + if type(item) is dict else item + for item in message_items) + + def _encode_img(self, url): + resp = requests.get(url) + return standard_b64encode(resp.content).decode() + + def _set_emoji_table(self, item: dict): + emoji_id = item['id'] + if emoji_id not in self.emoji_table: + self.emoji_table.setdefault(emoji_id, self._encode_img(item['url'])) + return emoji_id + + def _stylecode(self, name, code, width, height): + return ''.join((".", name, " { display: inline-block; background-image: url(data:image/png;base64,", + code, "); background-repeat: no-repeat; width: ", + str(width), "; height: ", str(height), ";}")) + + def _create_styles(self): + return '\n'.join(('\n')) + + def finalize(self): + self.header.extend([self._create_styles(), '\n']) + self.body.extend(['\n']) + with open(self.save_path, mode='a', encoding='utf-8') as f: + f.writelines(self.header) + f.writelines(self.body) diff --git a/pytchat/tool/extract/extractor.py b/pytchat/tool/extract/extractor.py index 1110e14..2b421af 100644 --- a/pytchat/tool/extract/extractor.py +++ b/pytchat/tool/extract/extractor.py @@ -83,11 +83,13 @@ class Extractor: data = self._execute_extract_operations() if self.processor is None: return data - return self.processor.process( + ret = self.processor.process( [{'video_id': None, 'timeout': 1, 'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}] ) + self.processor.finalize() + return ret def cancel(self): asyncdl.cancel() From 5dfd883fc953fd6dd863ab04f9ab91264770d7f8 Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Sun, 12 Jul 2020 23:47:02 +0900 Subject: [PATCH 3/5] Remove unnecessary line --- pytchat/processors/html_archiver.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pytchat/processors/html_archiver.py b/pytchat/processors/html_archiver.py index dba8d22..037b464 100644 --- a/pytchat/processors/html_archiver.py +++ b/pytchat/processors/html_archiver.py @@ -43,12 +43,11 @@ class HTMLArchiver(ChatProcessor): ''' HTMLArchiver saves chat data as HTML table format. ''' - def __init__(self, save_path): super().__init__() self.save_path = self._checkpath(save_path) self.processor = DefaultProcessor() - self.emoji_table = {} + self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary. self.header = [HEADER_HTML] self.body = ['\n', '\n', self._parse_table_header(fmt_headers)] @@ -81,7 +80,6 @@ class HTMLArchiver(ChatProcessor): """ if chat_components is None or len(chat_components) == 0: return - # chats = self.processor.process(chat_components).items self.body.extend( (self._parse_html_line(( c.datetime, From b7ff2b6537ea7b2a7bde0655658ad2d3fc32ca0a Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Mon, 13 Jul 2020 00:59:20 +0900 Subject: [PATCH 4/5] Restore logging settings --- pytchat/config/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py index 81d91cf..e374bc5 100644 --- a/pytchat/config/__init__.py +++ b/pytchat/config/__init__.py @@ -1,9 +1,9 @@ +import logging from . import mylogger - headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'} -def logger(module_name: str, loglevel=None): +def logger(module_name: str, loglevel=logging.DEBUG): module_logger = mylogger.get_logger(module_name, loglevel=loglevel) return module_logger From 366d75c2bbdf67dd022ad89e7567c94af265494f Mon Sep 17 00:00:00 2001 From: taizan-hokuto <55448286+taizan-hokuto@users.noreply.github.com> Date: Mon, 13 Jul 2020 01:44:49 +0900 Subject: [PATCH 5/5] Update README --- README.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 694ad8a..50a26fe 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ pip install pytchat ### CLI One-liner command. -Save chat data to html. +Save chat data to html, with embedded custom emojis. ```bash $ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/" @@ -148,6 +148,20 @@ def main(): if __name__ == '__main__': main() ``` +### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver). +```python +from pytchat import HTMLArchiver, Extractor + +video_id = "*******" +ex = Extractor( + video_id, + div=10, + processor=HTMLArchiver("c:/test.html") +) + +ex.extract() +print("finished.") +``` ## Structure of Default Processor Each item can be got with `items` function. @@ -175,7 +189,7 @@ Each item can be got with `items` function. - +
messageEx strlist of message texts and emoji URLs.list of message texts and emoji dicts(id, txt, url).
timestamp