Implement Superchat Calculator

This commit is contained in:
taizan-hokuto
2020-02-24 13:56:58 +09:00
parent af4afb4636
commit 3c95242ddf
14 changed files with 3744 additions and 38 deletions

View File

@@ -178,20 +178,20 @@ class LiveChatAsync:
} }
time_mark =time.time() time_mark =time.time()
if self._direct_mode: if self._direct_mode:
await self._callback( processed_chat = self.processor.process([chat_component])
self.processor.process([chat_component]) if isinstance(processed_chat,tuple):
) await self._callback(*processed_chat)
else:
await self._callback(processed_chat)
else: else:
await self._buffer.put(chat_component) await self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark) diff_time = timeout - (time.time()-time_mark)
await asyncio.sleep(diff_time) await asyncio.sleep(diff_time)
continuation = metadata.get('continuation') continuation = metadata.get('continuation')
except ChatParseException as e: except ChatParseException as e:
#self.terminate()
self._logger.debug(f"[{self.video_id}]{str(e)}") self._logger.debug(f"[{self.video_id}]{str(e)}")
return return
except (TypeError , json.JSONDecodeError) : except (TypeError , json.JSONDecodeError) :
#self.terminate()
self._logger.error(f"{traceback.format_exc(limit = -1)}") self._logger.error(f"{traceback.format_exc(limit = -1)}")
return return
@@ -211,13 +211,13 @@ class LiveChatAsync:
return continuation return continuation
async def _get_contents(self, continuation, session, headers): async def _get_contents(self, continuation, session, headers):
'''Get 'contents' dict from livechat json. '''Get 'continuationContents' from livechat json.
If contents is None at first fetching, If contents is None at first fetching,
try to fetch archive chat data. try to fetch archive chat data.
Return: Return:
------- -------
'contents' dict which includes metadata & chatdata. 'continuationContents' which includes metadata & chatdata.
''' '''
livechat_json = (await livechat_json = (await
self._get_livechat_json(continuation, session, headers) self._get_livechat_json(continuation, session, headers)
@@ -275,8 +275,11 @@ class LiveChatAsync:
""" """
while self.is_alive(): while self.is_alive():
items = await self._buffer.get() items = await self._buffer.get()
data = self.processor.process(items) processed_chat = self.processor.process(items)
await callback(data) if isinstance(processed_chat, tuple):
await self._callback(*processed_chat)
else:
await self._callback(processed_chat)
async def get(self): async def get(self):
""" bufferからデータを取り出し、processorに投げ、 """ bufferからデータを取り出し、processorに投げ、

View File

@@ -174,9 +174,11 @@ class LiveChat:
} }
time_mark =time.time() time_mark =time.time()
if self._direct_mode: if self._direct_mode:
self._callback( processed_chat = self.processor.process([chat_component])
self.processor.process([chat_component]) if isinstance(processed_chat,tuple):
) self._callback(*processed_chat)
else:
self._callback(processed_chat)
else: else:
self._buffer.put(chat_component) self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark) diff_time = timeout - (time.time()-time_mark)
@@ -204,13 +206,13 @@ class LiveChat:
return continuation return continuation
def _get_contents(self, continuation, session, headers): def _get_contents(self, continuation, session, headers):
'''Get 'contents' dict from livechat json. '''Get 'continuationContents' from livechat json.
If contents is None at first fetching, If contents is None at first fetching,
try to fetch archive chat data. try to fetch archive chat data.
Return: Return:
------- -------
'contents' dict which includes metadata & chatdata. 'continuationContents' which includes metadata & chat data.
''' '''
livechat_json = ( livechat_json = (
self._get_livechat_json(continuation, session, headers) self._get_livechat_json(continuation, session, headers)
@@ -268,8 +270,11 @@ class LiveChat:
""" """
while self.is_alive(): while self.is_alive():
items = self._buffer.get() items = self._buffer.get()
data = self.processor.process(items) processed_chat = self.processor.process(items)
callback(data) if isinstance(processed_chat, tuple):
self._callback(*processed_chat)
else:
self._callback(processed_chat)
def get(self): def get(self):
""" bufferからデータを取り出し、processorに投げ、 """ bufferからデータを取り出し、processorに投げ、

View File

@@ -1,23 +1,22 @@
class ChatProcessor: class ChatProcessor:
''' '''
Listenerからチャットデータactionsを受け取り Abstract class that processes chat data.
チャットデータを加工するクラスの抽象クラス Receive chat data (actions) from Listener.
''' '''
def process(self, chat_components: list): def process(self, chat_components: list):
''' '''
チャットデータの加工を表すインターフェース。 Interface that represents processing of chat data.
LiveChatオブジェクトから呼び出される。 Called from LiveChat object.
Parameter Parameter
---------- ----------
chat_components: List[component] chat_components: List[component]
component : dict { component : dict {
"video_id" : str "video_id" : str
動画ID
"timeout" : int "timeout" : int
次のチャットの再読み込みまでの時間(秒) Time to fetch next chat (seconds)
"chatdata" : List[dict] "chatdata" : List[dict]
チャットデータのリスト List of chat data.
} }
''' '''
pass pass

View File

@@ -35,7 +35,6 @@ class DefaultProcessor(ChatProcessor):
for component in chat_components: for component in chat_components:
timeout += component.get('timeout', 0) timeout += component.get('timeout', 0)
chatdata = component.get('chatdata') chatdata = component.get('chatdata')
if chatdata is None: continue if chatdata is None: continue
for action in chatdata: for action in chatdata:
if action is None: continue if action is None: continue

View File

@@ -0,0 +1,73 @@
import re
from pytchat.processors.chat_processor import ChatProcessor
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
items_paid = [
'addChatItemAction',
'item',
'liveChatPaidMessageRenderer'
]
items_sticker = [
'addChatItemAction',
'item',
'liveChatPaidStickerRenderer'
]
class Calculator(ChatProcessor):
"""
Calculate the amount of SuperChat by currency.
"""
def __init__(self):
self.results = {}
def process(self, chat_components: list):
"""
Return
------------
results : dict :
List of amount by currency.
key: currency symbol, value: total amount.
"""
if chat_components is None:
return self.results
for component in chat_components:
chatdata = component.get('chatdata')
if chatdata is None: continue
for action in chatdata:
renderer = self._get_item(action, items_paid) or \
self._get_item(action, items_sticker)
if renderer is None: continue
symbol, amount = self._parse(renderer)
self.results.setdefault(symbol,0)
self.results[symbol]+=amount
return self.results
def _parse(self, renderer):
purchase_amount_text = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(purchase_amount_text)
if m:
symbol = m.group(1)
amount = float(m.group(2).replace(',',''))
else:
symbol = ""
amount = 0.0
return symbol, amount
def _get_item(self, dict_body, items: list):
for item in items:
if dict_body is None:
break
if isinstance(dict_body, dict):
dict_body = dict_body.get(item)
continue
if isinstance(item, int) and \
isinstance(dict_body, list) and \
len(dict_body) > item:
dict_body = dict_body[item]
continue
return None
return dict_body

View File

@@ -38,6 +38,9 @@ class Block:
during_split : bool : during_split : bool :
whether this block is in the process of during_split. whether this block is in the process of during_split.
while True, this block is excluded from duplicate split procedure. while True, this block is excluded from duplicate split procedure.
seektime : float :
the last position of this block(seconds) already fetched.
""" """
__slots__ = ['first','last','end','continuation','chat_data','remaining', __slots__ = ['first','last','end','continuation','chat_data','remaining',
@@ -45,7 +48,7 @@ class Block:
def __init__(self, first = 0, last = 0, end = 0, def __init__(self, first = 0, last = 0, end = 0,
continuation = '', chat_data = [], is_last = False, continuation = '', chat_data = [], is_last = False,
during_split = False,seektime = None): during_split = False, seektime = None):
self.first = first self.first = first
self.last = last self.last = last
self.end = end self.end = end

View File

@@ -34,7 +34,6 @@ class DownloadWorker:
self.video_id:str = video_id self.video_id:str = video_id
self.parent_block:Block = None self.parent_block:Block = None
async def run(self, session): async def run(self, session):
while self.block.continuation: while self.block.continuation:
patch = await self.fetch( patch = await self.fetch(
@@ -44,11 +43,3 @@ class DownloadWorker:
self.block.done = True self.block.done = True
def fd(name,mes,src,patch,end):
def offset(chats):
if len(chats)==0:
return None,None
return parser.get_offset(chats[0]),parser.get_offset(chats[-1])
with open("v://tlog.csv",encoding="utf-8",mode="a") as f:
f.write(f"WORKER,{name},mes,{mes},edge,{offset(src)[1]},first,{offset(patch)[0]},last,{offset(patch)[1]},end,{end}\n")

View File

@@ -35,10 +35,7 @@ def parse(jsn):
metadata = cont.get('liveChatReplayContinuationData') metadata = cont.get('liveChatReplayContinuationData')
if metadata: if metadata:
continuation = metadata.get("continuation") continuation = metadata.get("continuation")
#print(continuation)
actions = contents['liveChatContinuation'].get('actions') actions = contents['liveChatContinuation'].get('actions')
# print(list(actions[0]['replayChatItemAction']["actions"][0].values()
# )[0]['item'].get("liveChatPaidMessageRenderer"))
if continuation: if continuation:
return continuation, [action["replayChatItemAction"]["actions"][0] return continuation, [action["replayChatItemAction"]["actions"][0]
for action in actions for action in actions

View File

@@ -8,7 +8,6 @@ def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890",-1) param = arcparam.getparam("01234567890",-1)
assert param == "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoADAAOABAAEgEUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA" assert param == "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoADAAOABAAEgEUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA"
def test_arcparam_1(mocker): def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime = 100000) param = arcparam.getparam("01234567890", seektime = 100000)
assert param == "op2w0wR3GjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QogNDbw_QCMAA4AEAASANSHAgAEAAYACAAKg5zdGF0aWNjaGVja3N1bUAAWANgAWgAcgQIARAAeAA%3D" assert param == "op2w0wR3GjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QogNDbw_QCMAA4AEAASANSHAgAEAAYACAAKg5zdGF0aWNjaGVja3N1bUAAWANgAWgAcgQIARAAeAA%3D"

View File

@@ -0,0 +1,138 @@
from pytchat.processors.superchat.calculator import Calculator
get_item = Calculator()._get_item
dict_test = {
'root':{
'node0' : 'value0',
'node1' : 'value1',
'node2' : {
'node2-0' : 'value2-0'
},
'node3' : [
{'node3-0' : 'value3-0'},
{'node3-1' :
{'node3-1-0' : 'value3-1-0'}
}
],
'node4' : [],
'node5' : [
[
{'node5-1-0' : 'value5-1-0'},
{'node5-1-1' : 'value5-1-1'},
],
{'node5-0' : 'value5-0'},
]
}
}
items_test0 = [
'root',
'node1'
]
items_test_not_found0 = [
'root',
'other_data'
]
items_test_nest = [
'root',
'node2',
'node2-0'
]
items_test_list0 = [
'root',
'node3',
1,
'node3-1'
]
items_test_list1 = [
'root',
'node3',
1,
'node3-1',
'node3-1-0'
]
items_test_list2 = [
'root',
'node4',
None
]
items_test_list3 = [
'root',
'node4'
]
items_test_list_nest = [
'root',
'node5',
0,
1,
'node5-1-1'
]
items_test_list_nest_not_found1 = [
'root',
'node5',
0,
1,
'node5-1-1',
'nodez'
]
items_test_not_found1 = [
'root',
'node3',
2,
'node3-1',
'node3-1-0'
]
items_test_not_found2 = [
'root',
'node3',
2,
'node3-1',
'node3-1-0',
'nodex'
]
def test_get_items_0():
assert get_item(dict_test, items_test0) == 'value1'
def test_get_items_1():
assert get_item(dict_test, items_test_not_found0) is None
def test_get_items_2():
assert get_item(dict_test, items_test_nest) == 'value2-0'
def test_get_items_3():
assert get_item(dict_test, items_test_list0) == {'node3-1-0' : 'value3-1-0'}
def test_get_items_4():
assert get_item(dict_test, items_test_list1) == 'value3-1-0'
def test_get_items_5():
assert get_item(dict_test, items_test_not_found1) == None
def test_get_items_6():
assert get_item(dict_test, items_test_not_found2) == None
def test_get_items_7():
assert get_item(dict_test, items_test_list2) == None
def test_get_items_8():
assert get_item(dict_test, items_test_list_nest) == 'value5-1-1'
def test_get_items_9():
assert get_item(dict_test, items_test_list_nest_not_found1) == None
def test_get_items_10():
assert get_item(dict_test, items_test_list3) == []

View File

@@ -0,0 +1,68 @@
import json
from pytchat.parser.live import Parser
from pytchat.processors.superchat.calculator import Calculator
from pytchat.exceptions import ChatParseException
parse = Calculator()._parse
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def load_chatdata(filepath):
parser = Parser(is_replay=True)
#print(json.loads(_open_file(filepath)))
contents = parser.get_contents( json.loads(_open_file(filepath)))
return parser.parse(contents)[1]
def test_parse_1():
renderer ={"purchaseAmountText":{"simpleText":"¥2,000"}}
symbol ,amount = parse(renderer)
assert symbol == ''
assert amount == 2000.0
def test_parse_2():
renderer ={"purchaseAmountText":{"simpleText":"ABC\x0a200"}}
symbol ,amount = parse(renderer)
assert symbol == 'ABC\x0a'
assert amount == 200.0
def test_process_0():
"""
parse superchat data
"""
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json")
}
assert Calculator().process([chat_component])=={'': 6800.0, '': 2.0}
def test_process_1():
"""
parse no superchat data
"""
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json")
}
assert Calculator().process([chat_component])=={}
def test_process_2():
"""
try to parse after replay end
"""
try:
chat_component = {
'video_id':'',
'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json")
}
assert False
Calculator().process([chat_component])
except ChatParseException:
assert True

View File

@@ -0,0 +1,18 @@
{
"response": {
"responseContext": {
"webResponseContextExtensionData": ""
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [
{
"playerSeekContinuationData": {
"continuation": "___reload_continuation___"
}
}
]
}
}
}
}

3324
tests/testdata/calculator/superchat_0.json vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,89 @@
{
"response": {
"responseContext": {
"webResponseContextExtensionData": ""
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [
{
"invalidationContinuationData": {
"invalidationId": {
"objectSource": 1000,
"objectId": "___objectId___",
"topic": "chat~00000000000~0000000",
"subscribeToGcmTopics": true,
"protoCreationTimestampMs": "1577804400000"
},
"timeoutMs": 10000,
"continuation": "___continuation___"
}
}
],
"actions": [
{
"replayChatItemAction": {
"actions": [
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"message": {
"runs": [
{
"text": "dummy_message"
}
]
},
"authorName": {
"simpleText": "author_name"
},
"authorPhoto": {
"thumbnails": [
{
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
"width": 32,
"height": 32
},
{
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
"width": 64,
"height": 64
}
]
},
"contextMenuEndpoint": {
"commandMetadata": {
"webCommandMetadata": {
"ignoreNavigation": true
}
},
"liveChatItemContextMenuEndpoint": {
"params": "___params___"
}
},
"id": "dummy_id",
"timestampUsec": 0,
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
"contextMenuAccessibility": {
"accessibilityData": {
"label": "コメントの操作"
}
},
"timestampText": {
"simpleText": "0:00"
}
}
},
"clientId": "dummy_client_id"
}
}
],
"videoOffsetTimeMsec": "10000"
}
}
]
}
}
}
}