Merge branch 'develop'

This commit is contained in:
taizan-hokuto
2019-12-22 02:56:54 +09:00
13 changed files with 591 additions and 90 deletions

View File

@@ -8,7 +8,7 @@ pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup. without using youtube api, Selenium or BeautifulSoup.
Other features: Other features:
+ Customizable chat data processors including yt api compatible one. + Customizable chat data processors including youtube api compatible one.
+ Available on asyncio context. + Available on asyncio context.
+ Quick fetching of initial chat data by generating continuation params + Quick fetching of initial chat data by generating continuation params
instead of web scraping. instead of web scraping.
@@ -29,10 +29,10 @@ from pytchat import LiveChat
chat = LiveChat("G1w62uEMZ74") chat = LiveChat("G1w62uEMZ74")
while chat.is_alive(): while chat.is_alive():
data = chat.get() data = chat.get()
for c in data.items: for c in data.items:
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}") print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
data.tick() data.tick()
``` ```
### callback mode ### callback mode
@@ -41,16 +41,16 @@ from pytchat import LiveChat
import time import time
def main() def main()
chat = LiveChat("G1w62uEMZ74", callback = func) chat = LiveChat("G1w62uEMZ74", callback = func)
while chat.is_alive(): while chat.is_alive():
time.sleep(3) time.sleep(3)
#other background operation. #other background operation.
#callback function is automatically called periodically. #callback function is automatically called periodically.
def func(data): def func(data):
for c in data.items: for c in data.items:
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}") print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
data.tick() data.tick()
``` ```
### asyncio context: ### asyncio context:
@@ -59,16 +59,16 @@ from pytchat import LiveChatAsync
import asyncio import asyncio
async def main(): async def main():
chat = LiveChatAsync("G1w62uEMZ74", callback = func) chat = LiveChatAsync("G1w62uEMZ74", callback = func)
while chat.is_alive(): while chat.is_alive():
await asyncio.sleep(3) await asyncio.sleep(3)
#other background operation. #other background operation.
#callback function is automatically called periodically. #callback function is automatically called periodically.
async def func(data): async def func(data):
for c in data.items: for c in data.items:
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}") print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
await data.tick_async() await data.tick_async()
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(main()) loop.run_until_complete(main())
@@ -80,16 +80,16 @@ loop.run_until_complete(main())
from pytchat import LiveChat, CompatibleProcessor from pytchat import LiveChat, CompatibleProcessor
chat = LiveChat("G1w62uEMZ74", chat = LiveChat("G1w62uEMZ74",
processor = CompatibleProcessor() ) processor = CompatibleProcessor() )
while chat.is_alive(): while chat.is_alive():
data = chat.get() data = chat.get()
polling = data["pollingIntervalMillis"]/1000 polling = data["pollingIntervalMillis"]/1000
for c in data["items"]: for c in data["items"]:
if c.get("snippet"): if c.get("snippet"):
print(f"[{c['authorDetails']['displayName']}]" print(f"[{c['authorDetails']['displayName']}]"
f"-{c['snippet']['displayMessage']}") f"-{c['snippet']['displayMessage']}")
time.sleep(polling/len(data["items"])) time.sleep(polling/len(data["items"]))
``` ```
### replay: ### replay:
@@ -98,21 +98,21 @@ from pytchat import ReplayChatAsync
import asyncio import asyncio
async def main(): async def main():
chat = ReplayChatAsync("G1w62uEMZ74", seektime = 1000, callback = func) chat = ReplayChatAsync("G1w62uEMZ74", seektime = 1000, callback = func)
while chat.is_alive(): while chat.is_alive():
await asyncio.sleep(3) await asyncio.sleep(3)
#other background operation here. #other background operation here.
#callback function is automatically called periodically. #callback function is automatically called periodically.
async def func(data): async def func(data):
for count in range(0,len(data.items)): for count in range(0,len(data.items)):
c= data.items[count] c= data.items[count]
if count!=len(data.items): if count!=len(data.items):
tick=data.items[count+1].timestamp -data.items[count].timestamp tick=data.items[count+1].timestamp -data.items[count].timestamp
else: else:
tick=0 tick=0
print(f"<{c.elapsedTime}> [{c.author.name}]-{c.message} {c.amountString}") print(f"<{c.elapsedTime}> [{c.author.name}]-{c.message} {c.amountString}")
await asyncio.sleep(tick/1000) await asyncio.sleep(tick/1000)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(main()) loop.run_until_complete(main())

View File

@@ -2,7 +2,7 @@
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup. pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.0.3.6' __version__ = '0.0.3.7'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
@@ -18,5 +18,6 @@ from .api import (
ChatProcessor, ChatProcessor,
CompatibleProcessor, CompatibleProcessor,
SimpleDisplayProcessor, SimpleDisplayProcessor,
JsonfileArchiveProcessor JsonfileArchiveProcessor,
SpeedCalculator
) )

View File

@@ -7,4 +7,4 @@ from .processors.default.processor import DefaultProcessor
from .processors.compatible.processor import CompatibleProcessor from .processors.compatible.processor import CompatibleProcessor
from .processors.simple_display_processor import SimpleDisplayProcessor from .processors.simple_display_processor import SimpleDisplayProcessor
from .processors.jsonfile_archive_processor import JsonfileArchiveProcessor from .processors.jsonfile_archive_processor import JsonfileArchiveProcessor
from .processors.speed_calculator import SpeedCalculator

View File

@@ -1,4 +1,4 @@
import logging import logging
LOGGER_MODE = logging.ERROR LOGGER_MODE = None
headers = { headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'} 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}

View File

@@ -10,14 +10,14 @@ class ChatProcessor:
Parameter Parameter
---------- ----------
chat_components: [LIST:component] chat_components: List[component]
component : dict { component : dict {
"video_id" : str "video_id" : str
動画ID 動画ID
"timeout" : int "timeout" : int
次のチャットの再読み込みまでの時間(秒) 次のチャットの再読み込みまでの時間(秒)
"chatdata" : list<object> "chatdata" : List[dict]
チャットデータactionsのリスト チャットデータのリスト
} }
''' '''
pass pass

View File

@@ -4,11 +4,12 @@ from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
from .. chat_processor import ChatProcessor
from ... import mylogger from ... import mylogger
from ... import config from ... import config
logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE) logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE)
class CompatibleProcessor: class CompatibleProcessor(ChatProcessor):
def process(self, chat_components: list): def process(self, chat_components: list):

View File

@@ -4,6 +4,7 @@ from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
from .. chat_processor import ChatProcessor
from ... import config from ... import config
from ... import mylogger from ... import mylogger
logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE) logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE)
@@ -25,7 +26,7 @@ class Chatdata:
return return
await asyncio.sleep(self.interval/len(self.items)) await asyncio.sleep(self.interval/len(self.items))
class DefaultProcessor: class DefaultProcessor(ChatProcessor):
def process(self, chat_components: list): def process(self, chat_components: list):
chatlist = [] chatlist = []

View File

@@ -4,7 +4,7 @@ speedmeter.py
Calculate speed of chat. Calculate speed of chat.
""" """
import calendar, datetime, pytz import calendar, datetime, pytz
from .chat_processor import ChatProcessor
class RingQueue: class RingQueue:
""" """
リング型キュー リング型キュー
@@ -21,7 +21,7 @@ class RingQueue:
キュー内に余裕があるか。キュー内のアイテム個数が、キューの最大個数未満であればTrue。 キュー内に余裕があるか。キュー内のアイテム個数が、キューの最大個数未満であればTrue。
""" """
def __init__(self, capacity = 10): def __init__(self, capacity):
""" """
コンストラクタ コンストラクタ
@@ -77,42 +77,47 @@ class RingQueue:
def item_count(self): def item_count(self):
return len(self.items) return len(self.items)
class SpeedCalculator(RingQueue): class SpeedCalculator(ChatProcessor, RingQueue):
""" """
チャットの勢いを計算するクラス チャットの勢いを計算する
一定期間のチャットデータのうち、最初のチャットの投稿時刻と
最後のチャットの投稿時刻の差を、チャット数で割り返し
1分あたりの速度に換算する。
Parameter Parameter
---------- ----------
格納するチャットブロックの数 capacity : int
RingQueueに格納するチャット勢い算出用データの最大数
""" """
def __init__(self, capacity, video_id): def __init__(self, capacity = 10):
super().__init__(capacity) super().__init__(capacity)
self.video_id=video_id
self.speed = 0 self.speed = 0
def process(self, chat_components: list): def process(self, chat_components: list):
chatdata = []
if chat_components: if chat_components:
for component in chat_components: for component in chat_components:
if component.get("chatdata"):
chatdata = component.get('chatdata') chatdata.extend(component.get("chatdata"))
if chatdata is None:
return self.speed
self.speed = self.calc(chatdata)
return self.speed
def _value(self): self._put_chatdata(chatdata)
self.speed = self._calc_speed()
return self.speed
def _calc_speed(self):
""" """
ActionsQueue内のチャットデータリストから RingQueue内のチャット勢い算出用データリストを元に
チャット速度を計算して返す チャット速度を計算して返す
Return Return
--------------------------- ---------------------------
チャット速度(1分間で換算したチャット数) チャット速度(1分間で換算したチャット数)
""" """
try: try:
#キュー内のactionsの総チャット数 #キュー内の総チャット数
total = sum(item['chat_count'] for item in self.items) total = sum(item['chat_count'] for item in self.items)
#キュー内の最初と最後のチャットの時間差 #キュー内の最初と最後のチャットの時間差
duration = (self.items[self.last_pos]['endtime'] duration = (self.items[self.last_pos]['endtime']
@@ -123,24 +128,20 @@ class SpeedCalculator(RingQueue):
except IndexError: except IndexError:
return 0 return 0
def _get_timestamp(self, action :dict): def _put_chatdata(self, actions):
""" """
チャットデータのtimestampUsecを読み取る チャットデータからタイムスタンプを読み取り、勢い測定用のデータを組み立て、
liveChatTickerSponsorItemRenderer等のtickerデータは時刻格納位置が RingQueueに投入する。
異なるため、時刻データなしとして扱う 200円以上のスパチャはtickerとmessageの2つのデータが生成されるが、
tickerの方は時刻データの場所が異なることを利用し、勢いの集計から除外している。
Parameter
---------
actions : List[dict]
チャットデータ(addChatItemAction) のリスト
""" """
try: def _put_emptydata():
item = action['addChatItemAction']['item']
timestamp = int(item[list(item.keys())[0]]['timestampUsec'])
except (KeyError,TypeError):
return None
return timestamp
def calc(self,actions):
def empty_data():
''' '''
データがない場合にゼロのデータをリングキューに入れる チャットデータがない場合にのデータをキューに投入する。
''' '''
timestamp_now = calendar.timegm(datetime.datetime. timestamp_now = calendar.timegm(datetime.datetime.
now(pytz.utc).utctimetuple()) now(pytz.utc).utctimetuple())
@@ -149,12 +150,23 @@ class SpeedCalculator(RingQueue):
'starttime':int(timestamp_now), 'starttime':int(timestamp_now),
'endtime':int(timestamp_now) 'endtime':int(timestamp_now)
}) })
return self._value()
def _get_timestamp(action :dict):
"""
チャットデータから時刻データを取り出す。
"""
try:
item = action['addChatItemAction']['item']
timestamp = int(item[list(item.keys())[0]]['timestampUsec'])
except (KeyError,TypeError):
return None
return timestamp
if actions is None or len(actions)==0: if actions is None or len(actions)==0:
return empty_data _put_emptydata()
return
#actions内の時刻データを持つチャットデータの数tickerは除く #actions内の時刻データを持つチャットデータの数
counter=0 counter=0
#actions内の最初のチャットデータの時刻 #actions内の最初のチャットデータの時刻
starttime= None starttime= None
@@ -163,7 +175,7 @@ class SpeedCalculator(RingQueue):
for action in actions: for action in actions:
#チャットデータからtimestampUsecを読み取る #チャットデータからtimestampUsecを読み取る
gettime = self._get_timestamp(action) gettime = _get_timestamp(action)
#時刻のないデータだった場合は次の行のデータで読み取り試行 #時刻のないデータだった場合は次の行のデータで読み取り試行
if gettime is None: if gettime is None:
@@ -177,11 +189,12 @@ class SpeedCalculator(RingQueue):
endtime = gettime endtime = gettime
#チャットの数をインクリメント #チャットの数をインクリメント
counter+=1 counter += 1
#チャット速度用のデータをリングキューに送る #チャット速度用のデータをRingQueueに送る
if starttime is None or endtime is None: if starttime is None or endtime is None:
return empty_data _put_emptydata()
return
self.put({ self.put({
'chat_count':counter, 'chat_count':counter,
@@ -189,4 +202,3 @@ class SpeedCalculator(RingQueue):
'endtime':int(endtime/1000000) 'endtime':int(endtime/1000000)
}) })
return self._value()

View File

@@ -0,0 +1,68 @@
import json
import pytest
import asyncio,aiohttp
from pytchat.parser.live import Parser
from pytchat.processors.compatible.processor import CompatibleProcessor
from pytchat.exceptions import (
NoLivechatRendererException,NoYtinitialdataException,
ResponseContextError, NoContentsException)
from pytchat.processors.speed_calculator import SpeedCalculator
parser = Parser()
def test_speed_1(mocker):
'''test speed calculation with normal json.
test json has 15 chatdata, duration is 30 seconds,
so the speed of chatdata is 30 chats/minute.
'''
processor = SpeedCalculator(capacity=30)
_json = _open_file("tests/testdata/speed/speedtest_normal.json")
_, chatdata = parser.parse(json.loads(_json))
data = {
"video_id" : "",
"timeout" : 10,
"chatdata" : chatdata
}
ret = processor.process([data])
assert 30 == ret
def test_speed_2(mocker):
'''test speed calculation with no valid chat data.
'''
processor = SpeedCalculator(capacity=30)
_json = _open_file("tests/testdata/speed/speedtest_undefined.json")
_, chatdata = parser.parse(json.loads(_json))
data = {
"video_id" : "",
"timeout" : 10,
"chatdata" : chatdata
}
ret = processor.process([data])
assert 0 == ret
def test_speed_3(mocker):
'''test speed calculation with empty data.
'''
processor = SpeedCalculator(capacity=30)
_json = _open_file("tests/testdata/speed/speedtest_empty.json")
_, chatdata = parser.parse(json.loads(_json))
data = {
"video_id" : "",
"timeout" : 10,
"chatdata" : chatdata
}
ret = processor.process([data])
assert 0 == ret
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()

164
tests/testdata/chat.json vendored Normal file
View File

@@ -0,0 +1,164 @@
{
"timing": {
"info": {
"st": 164
}
},
"csn": "",
"response": {
"responseContext": {
"serviceTrackingParams": [{
"service": "CSI",
"params": [{
"key": "GetLiveChat_rid",
"value": ""
}, {
"key": "c",
"value": "WEB"
}, {
"key": "cver",
"value": "2.20191219.03.01"
}, {
"key": "yt_li",
"value": "0"
}]
}, {
"service": "GFEEDBACK",
"params": [{
"key": "e",
"value": ""
}, {
"key": "logged_in",
"value": "0"
}]
}, {
"service": "GUIDED_HELP",
"params": [{
"key": "logged_in",
"value": "0"
}]
}, {
"service": "ECATCHER",
"params": [{
"key": "client.name",
"value": "WEB"
}, {
"key": "client.version",
"value": "2.2"
}, {
"key": "innertube.build.changelist",
"value": "228"
}, {
"key": "innertube.build.experiments.source_version",
"value": "2858"
}, {
"key": "innertube.build.label",
"value": "youtube.ytfe.innertube_"
}, {
"key": "innertube.build.timestamp",
"value": "154"
}, {
"key": "innertube.build.variants.checksum",
"value": "e"
}, {
"key": "innertube.run.job",
"value": "ytfe-innertube-replica-only.ytfe"
}]
}],
"webResponseContextExtensionData": {
"ytConfigData": {
"csn": "ADw",
"visitorData": "%3D%3D"
}
}
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [{
"timedContinuationData": {
"timeoutMs": 10000,
"continuation": "continuation"
}
}],
"actions": [{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"message": {
"runs": [{
"text": "message"
}]
},
"authorName": {
"simpleText": "authorName"
},
"authorPhoto": {
"thumbnails": [{
"url": "https://yt3.ggpht.com/photo.jpg",
"width": 32,
"height": 32
}, {
"url": "https://yt3.ggpht.com/photo.jpg",
"width": 64,
"height": 64
}]
},
"contextMenuEndpoint": {
"commandMetadata": {
"webCommandMetadata": {
"ignoreNavigation": true
}
},
"liveChatItemContextMenuEndpoint": {
"params": "params"
}
},
"id": "id",
"timestampUsec": "1576851922945411",
"authorBadges": [{
"liveChatAuthorBadgeRenderer": {
"customThumbnail": {
"thumbnails": [{
"url": "https://yt3.ggpht.com/photo.jpg"
}, {
"url": "https://yt3.ggpht.com/photo.jpg"
}]
},
"tooltip": "メンバー6 か月)",
"accessibility": {
"accessibilityData": {
"label": "メンバー6 か月)"
}
}
}
}],
"authorExternalChannelId": "UC",
"contextMenuAccessibility": {
"accessibilityData": {
"label": "コメントの操作"
}
}
}
},
"clientId": "00000000000000000000"
}
}
]}
},
"xsrf_token": "xsrf_token",
"url": "/live_chat/get_live_chat?continuation=0",
"endpoint": {
"commandMetadata": {
"webCommandMetadata": {
"url": "/live_chat/get_live_chat?continuation=0",
"rootVe": 0
}
},
"urlEndpoint": {
"url": "/live_chat/get_live_chat?continuation=0"
}
}
}
}

View File

@@ -0,0 +1,24 @@
{
"timing": {
"info": {
"st": 164
}
},
"csn": "",
"response": {
"responseContext": {
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [{
"timedContinuationData": {
"timeoutMs": 10000,
"continuation": "continuation"
}
}]
}
}
}
}

View File

@@ -0,0 +1,188 @@
{
"timing": {
"info": {
"st": 164
}
},
"csn": "",
"response": {
"responseContext": {
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [{
"timedContinuationData": {
"timeoutMs": 10000,
"continuation": "continuation"
}
}],
"actions": [{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000000000000"
}
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatTextMessageRenderer": {
"timestampUsec": "1500000030000000"
}
}
}
}
]}
}
}
}

View File

@@ -0,0 +1,42 @@
{
"timing": {
"info": {
"st": 164
}
},
"csn": "",
"response": {
"responseContext": {
},
"continuationContents": {
"liveChatContinuation": {
"continuations": [{
"timedContinuationData": {
"timeoutMs": 10000,
"continuation": "continuation"
}
}],
"actions": [{
"addChatItemAction": {
"liveChatPlaceholderItemRenderer": {
"id": "",
"timestampUsec": "1500000000000000"
}
}
},
{
"addChatItemAction": {
"item": {
"liveChatPlaceholderItemRenderer": {
"id": "",
"timestampUsec": "1500000030000000"
}
}
}
}
]}
}
}
}