This commit is contained in:
taizan-hokuto
2020-02-02 22:36:26 +09:00
parent e8510f1116
commit 24f08ecbdb
15 changed files with 19553 additions and 26 deletions

View File

@@ -73,7 +73,6 @@ class LiveChatAsync:
''' '''
_setup_finished = False _setup_finished = False
_logger = config.logger(__name__)
def __init__(self, video_id, def __init__(self, video_id,
seektime = 0, seektime = 0,

View File

@@ -74,7 +74,6 @@ class LiveChat:
_setup_finished = False _setup_finished = False
#チャット監視中のListenerのリスト #チャット監視中のListenerのリスト
_listeners = [] _listeners = []
_logger = config.logger(__name__)
def __init__(self, video_id, def __init__(self, video_id,
seektime = 0, seektime = 0,

View File

@@ -13,12 +13,23 @@ headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation=" "get_live_chat_replay?continuation="
def _divide(start, end, count): def _split(start, end, count, min_interval = 120):
min_interval = 120 """
if (not isinstance(start,int) or Split section from `start` to `end` into `count` pieces,
not isinstance(end,int) or and returns the beginning of each piece.
not isinstance(count,int)): The `count` is adjusted so that the length of each piece
raise ValueError("start/end/count must be int") is no smaller than `min_interval`.
Returns:
--------
List of the beginning position of each piece.
"""
if not (isinstance(start,int) or isinstance(start,float)) or \
not (isinstance(end,int) or isinstance(end,float)):
raise ValueError("start/end must be int or float")
if not isinstance(count,int):
raise ValueError("count must be int")
if start>end: if start>end:
raise ValueError("end must be equal to or greater than start.") raise ValueError("end must be equal to or greater than start.")
if count<1: if count<1:
@@ -39,7 +50,7 @@ def ready_blocks(video_id, duration, div, callback):
async def _get_blocks( video_id, duration, div, callback): async def _get_blocks( video_id, duration, div, callback):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
futures = [_create_block(session, video_id, pos, seektime, callback) futures = [_create_block(session, video_id, pos, seektime, callback)
for pos, seektime in enumerate(_divide(-1, duration, div))] for pos, seektime in enumerate(_split(-1, duration, div))]
return await asyncio.gather(*futures,return_exceptions=True) return await asyncio.gather(*futures,return_exceptions=True)
async def _create_block(session, video_id, pos, seektime, callback): async def _create_block(session, video_id, pos, seektime, callback):

View File

@@ -3,24 +3,24 @@ class Block:
Parameter: Parameter:
--------- ---------
pos : int pos : int :
index of this block on block list. index of this block on block list.
first : int first : int :
videoOffsetTimeMs of chat_data[0] videoOffsetTimeMs of chat_data[0]
last : int last : int :
videoOffsetTimeMs of the last chat_data current read. videoOffsetTimeMs of the last chat_data.
(chat_data[-1]) (chat_data[-1])
this value increases as fetching chatdata progresses. this value increases as fetching chatdata progresses.
temp_last : int temp_last : int :
temporary videoOffsetTimeMs of last chat data, target videoOffsetTimeMs of last chat data for download,
equals to first videoOffsetTimeMs of next block. equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, the download will stop. when download worker reaches this offset, stop downloading.
continuation : str continuation : str :
continuation param of last chat data. continuation param of last chat data.
chat_data : List chat_data : List

View File

@@ -2,15 +2,15 @@ from . import parser
class DownloadWorker: class DownloadWorker:
""" """
DownloadWorker : associates a download session with a block. DownloadWorker associates a download session with a block.
Parameter Parameter
---------- ----------
fetch : func fetch :
download function of asyncdl download function of asyncdl
block : Block block :
chunk of chat_data Block object that includes chat_data
""" """
def __init__(self, fetch, block): def __init__(self, fetch, block):
self.block = block self.block = block

View File

@@ -55,8 +55,9 @@ class Downloader:
return ( return (
self.ready_blocks() self.ready_blocks()
.remove_duplicate_head() .remove_duplicate_head()
.set_temporary_last() # .set_temporary_last()
.remove_overwrap() .remove_overwrap()
.set_temporary_last()
.download_blocks() .download_blocks()
.remove_duplicate_tail() .remove_duplicate_tail()
.combine() .combine()

View File

@@ -52,7 +52,6 @@ def duplicate_head(blocks):
and and
type_0 == type_1 type_0 == type_1
) )
ret = [blocks[i] for i in range(len(blocks)-1) ret = [blocks[i] for i in range(len(blocks)-1)
if (len(blocks[i].chat_data)>0 and if (len(blocks[i].chat_data)>0 and
not is_duplicate_head(i) )] not is_duplicate_head(i) )]
@@ -75,9 +74,9 @@ def duplicate_tail(blocks):
type_0 == type_1 type_0 == type_1
) )
ret = [blocks[i] for i in range(0,len(blocks)-1) ret = [blocks[i] for i in range(0,len(blocks))
if i == 0 or not is_duplicate_tail(i) ] if i == 0 or not is_duplicate_tail(i) ]
ret.append(blocks[-1]) #ret.append(blocks[-1])
return ret return ret
def overwrap(blocks): def overwrap(blocks):
@@ -103,3 +102,9 @@ def overwrap(blocks):
b = a+1 b = a+1
ret.append(blocks[-1]) ret.append(blocks[-1])
return ret return ret
def _dump(blocks):
print(__name__)
print(f"---------- first last temp_last {'':>3}---")
for i,block in enumerate(blocks):
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.temp_last:>10}")

77
tests/test_dl_asyncdl.py Normal file
View File

@@ -0,0 +1,77 @@
import aiohttp
import asyncio
import json
from pytchat.tool import parser
import sys
import time
from aioresponses import aioresponses
from concurrent.futures import CancelledError
from pytchat.tool import asyncdl
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_asyncdl_split(mocker):
ret = asyncdl._split(0,1000,1)
assert ret == [0]
ret = asyncdl._split(1000,1000,10)
assert ret == [1000]
ret = asyncdl._split(0,1000,5)
assert ret == [0,200,400,600,800]
ret = asyncdl._split(10.5, 700.3, 5)
assert ret == [10, 148, 286, 424, 562]
ret = asyncdl._split(0,500,5)
assert ret == [0,125,250,375]
ret = asyncdl._split(0,500,500)
assert ret == [0,125,250,375]
ret = asyncdl._split(-1,1000,5)
assert ret == [-1, 199, 399, 599, 799]
"""invalid argument order"""
try:
ret = asyncdl._split(500,0,5)
assert False
except ValueError:
assert True
"""invalid count"""
try:
ret = asyncdl._split(0,500,-1)
assert False
except ValueError:
assert True
try:
ret = asyncdl._split(0,500,0)
assert False
except ValueError:
assert True
"""invalid argument type"""
try:
ret = asyncdl._split(0,5000,5.2)
assert False
except ValueError:
assert True
try:
ret = asyncdl._split(0,5000,"test")
assert False
except ValueError:
assert True
try:
ret = asyncdl._split([0,1],5000,5)
assert False
except ValueError:
assert True

113
tests/test_dl_duplcheck.py Normal file
View File

@@ -0,0 +1,113 @@
import aiohttp
import asyncio
import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool import duplcheck
from pytchat.tool import parser
from pytchat.tool.block import Block
from pytchat.tool.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_overwrap(mocker):
"""
test overwrap data
operation : [0] , [1] -> discard [1]
[0] , [2] , [3] -> discard [2]
[3] , [4] , [5] -> discard [4]
result : [0] , [3] , [5]
"""
blocks = (
Block(0, 0, 38771, "",[]),
Block(1, 9890, 38771, "",[]),
Block(2, 20244, 45146, "",[]),
Block(3, 32476, 60520, "",[]),
Block(4, 41380, 62875, "",[]),
Block(5, 52568, 62875, "",[])
)
result = duplcheck.overwrap(blocks)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[3].first
assert result[1].last == blocks[3].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_head(mocker):
"""
test duplicate head data
operation : [0] , [1] -> discard [0]
[1] , [2] -> discard [1]
[2] , [3] -> append [2]
[3] , [4] -> discard [3]
[4] , [5] -> append [4]
append [5]
result : [0] , [3] , [5]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
blocks = (
Block(0, 0, 2500, "",load_chatdata("dp0-0.json")),
Block(1, 0, 38771, "",load_chatdata("dp0-1.json")),
Block(2, 0, 45146, "",load_chatdata("dp0-2.json")),
Block(3, 20244, 60520, "",load_chatdata("dp0-3.json")),
Block(4, 20244, 62875, "",load_chatdata("dp0-4.json")),
Block(5, 52568, 62875, "",load_chatdata("dp0-5.json"))
)
_dump(blocks)
result = duplcheck.duplicate_head(blocks)
assert len(result) == 3
assert result[0].first == blocks[2].first
assert result[0].last == blocks[2].last
assert result[1].first == blocks[4].first
assert result[1].last == blocks[4].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_tail(mocker):
"""
test duplicate tail data
operation : append [0]
[0] , [1] -> discard [1]
[1] , [2] -> append [2]
[2] , [3] -> discard [3]
[3] , [4] -> append [4]
[4] , [5] -> discard [5]
result : [0] , [2] , [4]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
blocks = (
Block(0, 0, 2500, "",load_chatdata("dp0-0.json")),
Block(1, 1500, 2500, "",load_chatdata("dp0-1.json")),
Block(2, 10000, 45146, "",load_chatdata("dp0-2.json")),
Block(3, 20244, 45146, "",load_chatdata("dp0-3.json")),
Block(4, 20244, 62875, "",load_chatdata("dp0-4.json")),
Block(5, 52568, 62875, "",load_chatdata("dp0-5.json"))
)
result = duplcheck.duplicate_tail(blocks)
_dump(result)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[2].first
assert result[1].last == blocks[2].last
assert result[2].first == blocks[4].first
assert result[2].last == blocks[4].last

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff