Merge branch 'feature/emoji_embedding' into develop
This commit is contained in:
18
README.md
18
README.md
@@ -27,7 +27,7 @@ pip install pytchat
|
|||||||
### CLI
|
### CLI
|
||||||
|
|
||||||
One-liner command.
|
One-liner command.
|
||||||
Save chat data to html.
|
Save chat data to html, with embedded custom emojis.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/"
|
$ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/"
|
||||||
@@ -148,6 +148,20 @@ def main():
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
```
|
```
|
||||||
|
### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
|
||||||
|
```python
|
||||||
|
from pytchat import HTMLArchiver, Extractor
|
||||||
|
|
||||||
|
video_id = "*******"
|
||||||
|
ex = Extractor(
|
||||||
|
video_id,
|
||||||
|
div=10,
|
||||||
|
processor=HTMLArchiver("c:/test.html")
|
||||||
|
)
|
||||||
|
|
||||||
|
ex.extract()
|
||||||
|
print("finished.")
|
||||||
|
```
|
||||||
|
|
||||||
## Structure of Default Processor
|
## Structure of Default Processor
|
||||||
Each item can be got with `items` function.
|
Each item can be got with `items` function.
|
||||||
@@ -175,7 +189,7 @@ Each item can be got with `items` function.
|
|||||||
<tr>
|
<tr>
|
||||||
<td>messageEx</td>
|
<td>messageEx</td>
|
||||||
<td>str</td>
|
<td>str</td>
|
||||||
<td>list of message texts and emoji URLs.</td>
|
<td>list of message texts and emoji dicts(id, txt, url).</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>timestamp</td>
|
<td>timestamp</td>
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
|
import logging
|
||||||
from . import mylogger
|
from . import mylogger
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
|
||||||
|
|
||||||
|
|
||||||
def logger(module_name: str, loglevel=None):
|
def logger(module_name: str, loglevel=logging.DEBUG):
|
||||||
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
||||||
return module_logger
|
return module_logger
|
||||||
|
|||||||
@@ -325,7 +325,8 @@ class LiveChatAsync:
|
|||||||
self._pauser.put_nowait(None)
|
self._pauser.put_nowait(None)
|
||||||
self._is_alive = False
|
self._is_alive = False
|
||||||
self._buffer.put_nowait({})
|
self._buffer.put_nowait({})
|
||||||
|
self.processor.finalize()
|
||||||
|
|
||||||
def _task_finished(self):
|
def _task_finished(self):
|
||||||
'''
|
'''
|
||||||
Listenerを終了する。
|
Listenerを終了する。
|
||||||
|
|||||||
@@ -316,6 +316,7 @@ class LiveChat:
|
|||||||
self._is_alive = False
|
self._is_alive = False
|
||||||
self._buffer.put({})
|
self._buffer.put({})
|
||||||
self._event.set()
|
self._event.set()
|
||||||
|
self.processor.finalize()
|
||||||
|
|
||||||
def _task_finished(self):
|
def _task_finished(self):
|
||||||
'''
|
'''
|
||||||
|
|||||||
@@ -21,3 +21,10 @@ class ChatProcessor:
|
|||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def finalize(self, *args, **kwargs):
|
||||||
|
'''
|
||||||
|
Interface for finalizing the process.
|
||||||
|
Called when chat fetching finished.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|||||||
@@ -52,8 +52,11 @@ class BaseRenderer:
|
|||||||
if r:
|
if r:
|
||||||
if r.get('emoji'):
|
if r.get('emoji'):
|
||||||
message += r['emoji'].get('shortcuts', [''])[0]
|
message += r['emoji'].get('shortcuts', [''])[0]
|
||||||
message_ex.append(
|
message_ex.append({
|
||||||
r['emoji']['image']['thumbnails'][1].get('url'))
|
'id': r['emoji'].get('emojiId').split('/')[-1],
|
||||||
|
'txt': r['emoji'].get('shortcuts', [''])[0],
|
||||||
|
'url': r['emoji']['image']['thumbnails'][0].get('url')
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
message += r.get('text', '')
|
message += r.get('text', '')
|
||||||
message_ex.append(r.get('text', ''))
|
message_ex.append(r.get('text', ''))
|
||||||
|
|||||||
@@ -1,31 +1,55 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import requests
|
||||||
|
from base64 import standard_b64encode
|
||||||
from .chat_processor import ChatProcessor
|
from .chat_processor import ChatProcessor
|
||||||
from .default.processor import DefaultProcessor
|
from .default.processor import DefaultProcessor
|
||||||
|
|
||||||
|
|
||||||
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
|
||||||
fmt_headers = ['datetime', 'elapsed', 'authorName',
|
fmt_headers = ['datetime', 'elapsed', 'authorName',
|
||||||
'message', 'superchat', 'type', 'authorChannel']
|
'message', 'superchat', 'type', 'authorChannel']
|
||||||
|
|
||||||
HEADER_HTML = '''
|
HEADER_HTML = '''
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
'''
|
||||||
|
|
||||||
|
TABLE_CSS = '''
|
||||||
|
table.css {
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.css thead{
|
||||||
|
border-collapse: collapse;
|
||||||
|
border: 1px solid #000
|
||||||
|
}
|
||||||
|
|
||||||
|
table.css tr td{
|
||||||
|
padding: 0.3em;
|
||||||
|
border: 1px solid #000
|
||||||
|
}
|
||||||
|
|
||||||
|
table.css th{
|
||||||
|
padding: 0.3em;
|
||||||
|
border: 1px solid #000
|
||||||
|
}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
class HTMLArchiver(ChatProcessor):
|
class HTMLArchiver(ChatProcessor):
|
||||||
'''
|
'''
|
||||||
HtmlArchiver saves chat data as HTML table format.
|
HTMLArchiver saves chat data as HTML table format.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, save_path):
|
def __init__(self, save_path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.save_path = self._checkpath(save_path)
|
self.save_path = self._checkpath(save_path)
|
||||||
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
|
||||||
f.write(HEADER_HTML)
|
|
||||||
f.write('<table border="1" style="border-collapse: collapse">')
|
|
||||||
f.writelines(self._parse_html_header(fmt_headers))
|
|
||||||
self.processor = DefaultProcessor()
|
self.processor = DefaultProcessor()
|
||||||
|
self.emoji_table = {} # table for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||||
|
self.header = [HEADER_HTML]
|
||||||
|
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
||||||
|
|
||||||
def _checkpath(self, filepath):
|
def _checkpath(self, filepath):
|
||||||
splitter = os.path.splitext(os.path.basename(filepath))
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
@@ -56,42 +80,58 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
"""
|
"""
|
||||||
if chat_components is None or len(chat_components) == 0:
|
if chat_components is None or len(chat_components) == 0:
|
||||||
return
|
return
|
||||||
|
self.body.extend(
|
||||||
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
(self._parse_html_line((
|
||||||
chats = self.processor.process(chat_components).items
|
c.datetime,
|
||||||
for c in chats:
|
c.elapsedTime,
|
||||||
f.writelines(
|
c.author.name,
|
||||||
self._parse_html_line([
|
self._parse_message(c.messageEx),
|
||||||
c.datetime,
|
c.amountString,
|
||||||
c.elapsedTime,
|
c.author.type,
|
||||||
c.author.name,
|
c.author.channelId)
|
||||||
c.message,
|
) for c in self.processor.process(chat_components).items)
|
||||||
c.amountString,
|
)
|
||||||
c.author.type,
|
|
||||||
c.author.channelId]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
#Palliative treatment#
|
|
||||||
Comment out below line to prevent the table
|
|
||||||
display from collapsing.
|
|
||||||
'''
|
|
||||||
# f.write('</table>')
|
|
||||||
|
|
||||||
def _parse_html_line(self, raw_line):
|
def _parse_html_line(self, raw_line):
|
||||||
html = ''
|
return ''.join(('<tr>',
|
||||||
html += ' <tr>'
|
''.join(''.join(('<td>', cell, '</td>')) for cell in raw_line),
|
||||||
for cell in raw_line:
|
'</tr>\n'))
|
||||||
html += '<td>' + cell + '</td>'
|
|
||||||
html += '</tr>\n'
|
|
||||||
return html
|
|
||||||
|
|
||||||
def _parse_html_header(self, raw_line):
|
def _parse_table_header(self, raw_line):
|
||||||
html = ''
|
return ''.join(('<thead><tr>',
|
||||||
html += '<thead>\n'
|
''.join(''.join(('<th>', cell, '</th>')) for cell in raw_line),
|
||||||
html += ' <tr>'
|
'</tr></thead>\n'))
|
||||||
for cell in raw_line:
|
|
||||||
html += '<th>' + cell + '</th>'
|
def _parse_message(self, message_items: list) -> str:
|
||||||
html += '</tr>\n'
|
return ''.join(''.join(('<span class="', self._set_emoji_table(item), '" title="', item['txt'], '"></span>'))
|
||||||
html += '</thead>\n'
|
if type(item) is dict else item
|
||||||
return html
|
for item in message_items)
|
||||||
|
|
||||||
|
def _encode_img(self, url):
|
||||||
|
resp = requests.get(url)
|
||||||
|
return standard_b64encode(resp.content).decode()
|
||||||
|
|
||||||
|
def _set_emoji_table(self, item: dict):
|
||||||
|
emoji_id = item['id']
|
||||||
|
if emoji_id not in self.emoji_table:
|
||||||
|
self.emoji_table.setdefault(emoji_id, self._encode_img(item['url']))
|
||||||
|
return emoji_id
|
||||||
|
|
||||||
|
def _stylecode(self, name, code, width, height):
|
||||||
|
return ''.join((".", name, " { display: inline-block; background-image: url(data:image/png;base64,",
|
||||||
|
code, "); background-repeat: no-repeat; width: ",
|
||||||
|
str(width), "; height: ", str(height), ";}"))
|
||||||
|
|
||||||
|
def _create_styles(self):
|
||||||
|
return '\n'.join(('<style type="text/css">',
|
||||||
|
TABLE_CSS,
|
||||||
|
'\n'.join(self._stylecode(key, self.emoji_table[key], 24, 24)
|
||||||
|
for key in self.emoji_table.keys()),
|
||||||
|
'</style>\n'))
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
self.header.extend([self._create_styles(), '</head>\n'])
|
||||||
|
self.body.extend(['</table>\n</body>'])
|
||||||
|
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
||||||
|
f.writelines(self.header)
|
||||||
|
f.writelines(self.body)
|
||||||
|
|||||||
@@ -83,11 +83,13 @@ class Extractor:
|
|||||||
data = self._execute_extract_operations()
|
data = self._execute_extract_operations()
|
||||||
if self.processor is None:
|
if self.processor is None:
|
||||||
return data
|
return data
|
||||||
return self.processor.process(
|
ret = self.processor.process(
|
||||||
[{'video_id': None,
|
[{'video_id': None,
|
||||||
'timeout': 1,
|
'timeout': 1,
|
||||||
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
)
|
)
|
||||||
|
self.processor.finalize()
|
||||||
|
return ret
|
||||||
|
|
||||||
def cancel(self):
|
def cancel(self):
|
||||||
asyncdl.cancel()
|
asyncdl.cancel()
|
||||||
|
|||||||
Reference in New Issue
Block a user