This commit is contained in:
taizan-hokuto
2020-02-02 22:36:26 +09:00
parent e8510f1116
commit 24f08ecbdb
15 changed files with 19553 additions and 26 deletions

View File

@@ -73,7 +73,6 @@ class LiveChatAsync:
'''
_setup_finished = False
_logger = config.logger(__name__)
def __init__(self, video_id,
seektime = 0,

View File

@@ -74,7 +74,6 @@ class LiveChat:
_setup_finished = False
#チャット監視中のListenerのリスト
_listeners = []
_logger = config.logger(__name__)
def __init__(self, video_id,
seektime = 0,

View File

@@ -13,12 +13,23 @@ headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
def _divide(start, end, count):
min_interval = 120
if (not isinstance(start,int) or
not isinstance(end,int) or
not isinstance(count,int)):
raise ValueError("start/end/count must be int")
def _split(start, end, count, min_interval = 120):
"""
Split section from `start` to `end` into `count` pieces,
and returns the beginning of each piece.
The `count` is adjusted so that the length of each piece
is no smaller than `min_interval`.
Returns:
--------
List of the beginning position of each piece.
"""
if not (isinstance(start,int) or isinstance(start,float)) or \
not (isinstance(end,int) or isinstance(end,float)):
raise ValueError("start/end must be int or float")
if not isinstance(count,int):
raise ValueError("count must be int")
if start>end:
raise ValueError("end must be equal to or greater than start.")
if count<1:
@@ -39,7 +50,7 @@ def ready_blocks(video_id, duration, div, callback):
async def _get_blocks( video_id, duration, div, callback):
async with aiohttp.ClientSession() as session:
futures = [_create_block(session, video_id, pos, seektime, callback)
for pos, seektime in enumerate(_divide(-1, duration, div))]
for pos, seektime in enumerate(_split(-1, duration, div))]
return await asyncio.gather(*futures,return_exceptions=True)
async def _create_block(session, video_id, pos, seektime, callback):

View File

@@ -3,24 +3,24 @@ class Block:
Parameter:
---------
pos : int
pos : int :
index of this block on block list.
first : int
first : int :
videoOffsetTimeMs of chat_data[0]
last : int
videoOffsetTimeMs of the last chat_data current read.
last : int :
videoOffsetTimeMs of the last chat_data.
(chat_data[-1])
this value increases as fetching chatdata progresses.
temp_last : int
temporary videoOffsetTimeMs of last chat data,
temp_last : int :
target videoOffsetTimeMs of last chat data for download,
equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, the download will stop.
when download worker reaches this offset, stop downloading.
continuation : str
continuation : str :
continuation param of last chat data.
chat_data : List

View File

@@ -2,15 +2,15 @@ from . import parser
class DownloadWorker:
"""
DownloadWorker : associates a download session with a block.
DownloadWorker associates a download session with a block.
Parameter
----------
fetch : func
fetch :
download function of asyncdl
block : Block
chunk of chat_data
block :
Block object that includes chat_data
"""
def __init__(self, fetch, block):
self.block = block

View File

@@ -55,8 +55,9 @@ class Downloader:
return (
self.ready_blocks()
.remove_duplicate_head()
.set_temporary_last()
# .set_temporary_last()
.remove_overwrap()
.set_temporary_last()
.download_blocks()
.remove_duplicate_tail()
.combine()

View File

@@ -52,7 +52,6 @@ def duplicate_head(blocks):
and
type_0 == type_1
)
ret = [blocks[i] for i in range(len(blocks)-1)
if (len(blocks[i].chat_data)>0 and
not is_duplicate_head(i) )]
@@ -75,9 +74,9 @@ def duplicate_tail(blocks):
type_0 == type_1
)
ret = [blocks[i] for i in range(0,len(blocks)-1)
ret = [blocks[i] for i in range(0,len(blocks))
if i == 0 or not is_duplicate_tail(i) ]
ret.append(blocks[-1])
#ret.append(blocks[-1])
return ret
def overwrap(blocks):
@@ -103,3 +102,9 @@ def overwrap(blocks):
b = a+1
ret.append(blocks[-1])
return ret
def _dump(blocks):
print(__name__)
print(f"---------- first last temp_last {'':>3}---")
for i,block in enumerate(blocks):
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.temp_last:>10}")

77
tests/test_dl_asyncdl.py Normal file
View File

@@ -0,0 +1,77 @@
import aiohttp
import asyncio
import json
from pytchat.tool import parser
import sys
import time
from aioresponses import aioresponses
from concurrent.futures import CancelledError
from pytchat.tool import asyncdl
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_asyncdl_split(mocker):
ret = asyncdl._split(0,1000,1)
assert ret == [0]
ret = asyncdl._split(1000,1000,10)
assert ret == [1000]
ret = asyncdl._split(0,1000,5)
assert ret == [0,200,400,600,800]
ret = asyncdl._split(10.5, 700.3, 5)
assert ret == [10, 148, 286, 424, 562]
ret = asyncdl._split(0,500,5)
assert ret == [0,125,250,375]
ret = asyncdl._split(0,500,500)
assert ret == [0,125,250,375]
ret = asyncdl._split(-1,1000,5)
assert ret == [-1, 199, 399, 599, 799]
"""invalid argument order"""
try:
ret = asyncdl._split(500,0,5)
assert False
except ValueError:
assert True
"""invalid count"""
try:
ret = asyncdl._split(0,500,-1)
assert False
except ValueError:
assert True
try:
ret = asyncdl._split(0,500,0)
assert False
except ValueError:
assert True
"""invalid argument type"""
try:
ret = asyncdl._split(0,5000,5.2)
assert False
except ValueError:
assert True
try:
ret = asyncdl._split(0,5000,"test")
assert False
except ValueError:
assert True
try:
ret = asyncdl._split([0,1],5000,5)
assert False
except ValueError:
assert True

113
tests/test_dl_duplcheck.py Normal file
View File

@@ -0,0 +1,113 @@
import aiohttp
import asyncio
import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool import duplcheck
from pytchat.tool import parser
from pytchat.tool.block import Block
from pytchat.tool.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_overwrap(mocker):
"""
test overwrap data
operation : [0] , [1] -> discard [1]
[0] , [2] , [3] -> discard [2]
[3] , [4] , [5] -> discard [4]
result : [0] , [3] , [5]
"""
blocks = (
Block(0, 0, 38771, "",[]),
Block(1, 9890, 38771, "",[]),
Block(2, 20244, 45146, "",[]),
Block(3, 32476, 60520, "",[]),
Block(4, 41380, 62875, "",[]),
Block(5, 52568, 62875, "",[])
)
result = duplcheck.overwrap(blocks)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[3].first
assert result[1].last == blocks[3].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_head(mocker):
"""
test duplicate head data
operation : [0] , [1] -> discard [0]
[1] , [2] -> discard [1]
[2] , [3] -> append [2]
[3] , [4] -> discard [3]
[4] , [5] -> append [4]
append [5]
result : [0] , [3] , [5]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
blocks = (
Block(0, 0, 2500, "",load_chatdata("dp0-0.json")),
Block(1, 0, 38771, "",load_chatdata("dp0-1.json")),
Block(2, 0, 45146, "",load_chatdata("dp0-2.json")),
Block(3, 20244, 60520, "",load_chatdata("dp0-3.json")),
Block(4, 20244, 62875, "",load_chatdata("dp0-4.json")),
Block(5, 52568, 62875, "",load_chatdata("dp0-5.json"))
)
_dump(blocks)
result = duplcheck.duplicate_head(blocks)
assert len(result) == 3
assert result[0].first == blocks[2].first
assert result[0].last == blocks[2].last
assert result[1].first == blocks[4].first
assert result[1].last == blocks[4].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_tail(mocker):
"""
test duplicate tail data
operation : append [0]
[0] , [1] -> discard [1]
[1] , [2] -> append [2]
[2] , [3] -> discard [3]
[3] , [4] -> append [4]
[4] , [5] -> discard [5]
result : [0] , [2] , [4]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
blocks = (
Block(0, 0, 2500, "",load_chatdata("dp0-0.json")),
Block(1, 1500, 2500, "",load_chatdata("dp0-1.json")),
Block(2, 10000, 45146, "",load_chatdata("dp0-2.json")),
Block(3, 20244, 45146, "",load_chatdata("dp0-3.json")),
Block(4, 20244, 62875, "",load_chatdata("dp0-4.json")),
Block(5, 52568, 62875, "",load_chatdata("dp0-5.json"))
)
result = duplcheck.duplicate_tail(blocks)
_dump(result)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[2].first
assert result[1].last == blocks[2].last
assert result[2].first == blocks[4].first
assert result[2].last == blocks[4].last

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff