Aggregate return values with patch class
This commit is contained in:
@@ -1,153 +0,0 @@
|
|||||||
from . import parser
|
|
||||||
from .. paramgen import arcparam
|
|
||||||
from . block import Block
|
|
||||||
class DownloadWorker:
|
|
||||||
"""
|
|
||||||
DownloadWorker associates a download session with a block.
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
----------
|
|
||||||
fetch : func :
|
|
||||||
download function of asyncdl
|
|
||||||
|
|
||||||
block : Block :
|
|
||||||
Block object associated with this worker
|
|
||||||
|
|
||||||
blocks : list :
|
|
||||||
List of Block(s)
|
|
||||||
|
|
||||||
video_id : str :
|
|
||||||
|
|
||||||
source_block : Block :
|
|
||||||
the Block from which current downloading block is splitted
|
|
||||||
"""
|
|
||||||
__slots__ = ['fetch', 'block', 'blocks', 'video_id', 'source_block']
|
|
||||||
|
|
||||||
def __init__(self, fetch, block, blocks, video_id ):
|
|
||||||
self.block = block
|
|
||||||
self.fetch = fetch
|
|
||||||
self.blocks = blocks
|
|
||||||
self.video_id = video_id
|
|
||||||
self.source_block = None
|
|
||||||
|
|
||||||
async def run(self, session):
|
|
||||||
"""Remove extra chats just after ready_blocks(). """
|
|
||||||
continuation = initial_fill(self.block)
|
|
||||||
"""download loop """
|
|
||||||
while continuation:
|
|
||||||
chats, new_cont, fetched_first, fetched_last = await self.fetch(
|
|
||||||
continuation, session)
|
|
||||||
if fetched_first is None:
|
|
||||||
break
|
|
||||||
if self.source_block:
|
|
||||||
continuation = split_fill(
|
|
||||||
self.source_block, self.block, chats, new_cont,
|
|
||||||
fetched_first, fetched_last)
|
|
||||||
self.source_block = None
|
|
||||||
else:
|
|
||||||
continuation = fill(self.block, chats, new_cont, fetched_last)
|
|
||||||
|
|
||||||
if continuation is None:
|
|
||||||
new_block = get_new_block(self)
|
|
||||||
self.block = new_block
|
|
||||||
continuation = new_block.continuation
|
|
||||||
|
|
||||||
def get_new_block(worker) -> Block:
|
|
||||||
worker.block.done = True
|
|
||||||
index,undone_block = search_undone_block(worker.blocks)
|
|
||||||
if undone_block is None:
|
|
||||||
return Block(continuation = None)
|
|
||||||
mean = (undone_block.end + undone_block.last)/2
|
|
||||||
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
|
|
||||||
worker.source_block = undone_block
|
|
||||||
worker.source_block.splitting = True
|
|
||||||
new_block = Block(
|
|
||||||
end = undone_block.end,
|
|
||||||
chat_data = [],
|
|
||||||
continuation = continuation,
|
|
||||||
splitting = True,
|
|
||||||
is_last = worker.source_block.is_last)
|
|
||||||
worker.blocks.insert(index+1,new_block)
|
|
||||||
return new_block
|
|
||||||
|
|
||||||
def search_undone_block(blocks) -> (int, Block):
|
|
||||||
"""
|
|
||||||
Returns
|
|
||||||
--------
|
|
||||||
ret_index : int :
|
|
||||||
index of Block download not completed in blocks .
|
|
||||||
|
|
||||||
ret_block : Block :
|
|
||||||
Block download not completed.
|
|
||||||
"""
|
|
||||||
max_remaining = 0
|
|
||||||
ret_block = None
|
|
||||||
ret_index = 0
|
|
||||||
for index, block in enumerate(blocks):
|
|
||||||
if block.done or block.splitting:
|
|
||||||
continue
|
|
||||||
remaining = block.remaining
|
|
||||||
if remaining > max_remaining and remaining > 120000:
|
|
||||||
ret_index = index
|
|
||||||
ret_block = block
|
|
||||||
max_remaining = remaining
|
|
||||||
return ret_index, ret_block
|
|
||||||
|
|
||||||
def top_cut(chats, last) -> list:
|
|
||||||
for i, chat in enumerate(chats):
|
|
||||||
if parser.get_offset(chat) > last:
|
|
||||||
return chats[i:]
|
|
||||||
return []
|
|
||||||
|
|
||||||
def bottom_cut(chats, last) -> list:
|
|
||||||
for rchat in reversed(chats):
|
|
||||||
if parser.get_offset(rchat)>=last:
|
|
||||||
chats.pop()
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return chats
|
|
||||||
|
|
||||||
def split_fill(source_block, block, chats, new_cont,
|
|
||||||
fetched_first, fetched_last):
|
|
||||||
if fetched_last <= source_block.last:
|
|
||||||
return None
|
|
||||||
block.splitting = False
|
|
||||||
source_block.splitting = False
|
|
||||||
source_block.end = fetched_first
|
|
||||||
block.first = fetched_first
|
|
||||||
block.last = fetched_last
|
|
||||||
continuation = new_cont
|
|
||||||
if fetched_first < source_block.last:
|
|
||||||
chats = top_cut(chats, source_block.last)
|
|
||||||
block.first = source_block.last
|
|
||||||
if block.end < fetched_last:
|
|
||||||
chats = bottom_cut(chats, block.end)
|
|
||||||
block.last = block.end
|
|
||||||
continuation = None
|
|
||||||
block.chat_data.extend(chats)
|
|
||||||
block.continuation = continuation
|
|
||||||
return continuation
|
|
||||||
|
|
||||||
def initial_fill(block):
|
|
||||||
chats, cont = get_chats(block, block.chat_data, block.continuation, block.last)
|
|
||||||
block.chat_data = chats
|
|
||||||
return cont
|
|
||||||
|
|
||||||
def fill(block, chats, cont, fetched_last):
|
|
||||||
chats, cont = get_chats(block, chats, cont, fetched_last)
|
|
||||||
block.chat_data.extend(chats)
|
|
||||||
return cont
|
|
||||||
|
|
||||||
def get_chats(block, chats, cont, fetched_last):
|
|
||||||
block.last = fetched_last
|
|
||||||
if fetched_last < block.end or block.is_last:
|
|
||||||
block.last = fetched_last
|
|
||||||
block.remaining=block.end-block.last
|
|
||||||
return chats, cont
|
|
||||||
for i, line in enumerate(chats):
|
|
||||||
line_offset = parser.get_offset(line)
|
|
||||||
if line_offset >= block.end:
|
|
||||||
block.last = line_offset
|
|
||||||
block.remaining = 0
|
|
||||||
block.done = True
|
|
||||||
return chats[:i], None
|
|
||||||
0
pytchat/tool/download/__init__.py
Normal file
0
pytchat/tool/download/__init__.py
Normal file
@@ -5,16 +5,17 @@ import json
|
|||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . dlworker import DownloadWorker
|
from . dlworker import DownloadWorker
|
||||||
from .. paramgen import arcparam
|
from . patch import Patch
|
||||||
from .. import config
|
from ... import config
|
||||||
from urllib.parse import quote
|
from ... paramgen import arcparam
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
||||||
"get_live_chat_replay?continuation="
|
"get_live_chat_replay?continuation="
|
||||||
|
|
||||||
def _split(start, end, count, min_interval = 120):
|
def _split(start, end, count, min_interval_sec = 120):
|
||||||
"""
|
"""
|
||||||
Split section from `start` to `end` into `count` pieces,
|
Split section from `start` to `end` into `count` pieces,
|
||||||
and returns the beginning of each piece.
|
and returns the beginning of each piece.
|
||||||
@@ -23,7 +24,7 @@ def _split(start, end, count, min_interval = 120):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
List of the beginning position of each piece.
|
List of the offset of each block's first chat data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not (isinstance(start,int) or isinstance(start,float)) or \
|
if not (isinstance(start,int) or isinstance(start,float)) or \
|
||||||
@@ -35,14 +36,14 @@ def _split(start, end, count, min_interval = 120):
|
|||||||
raise ValueError("end must be equal to or greater than start.")
|
raise ValueError("end must be equal to or greater than start.")
|
||||||
if count<1:
|
if count<1:
|
||||||
raise ValueError("count must be equal to or greater than 1.")
|
raise ValueError("count must be equal to or greater than 1.")
|
||||||
if (end-start)/count < min_interval:
|
if (end-start)/count < min_interval_sec:
|
||||||
count = int((end-start)/min_interval)
|
count = int((end-start)/min_interval_sec)
|
||||||
if count == 0 : count = 1
|
if count == 0 : count = 1
|
||||||
interval= (end-start)/count
|
interval= (end-start)/count
|
||||||
|
|
||||||
if count == 1:
|
if count == 1:
|
||||||
return [start]
|
return [start]
|
||||||
return sorted(list(set([int(start+interval*j)
|
return sorted( list(set( [int(start + interval*j)
|
||||||
for j in range(count) ])))
|
for j in range(count) ])))
|
||||||
|
|
||||||
def ready_blocks(video_id, duration, div, callback):
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
@@ -50,27 +51,16 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
|
|
||||||
async def _get_blocks( video_id, duration, div, callback):
|
async def _get_blocks( video_id, duration, div, callback):
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
tasks = [_create_block(session, video_id, pos, seektime, callback)
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
for pos, seektime in enumerate(_split(-1, duration, div))]
|
for seektime in _split(-1, duration, div)]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
async def _create_block(session, video_id, pos, seektime, callback):
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
continuation = arcparam.getparam(
|
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||||
video_id, seektime = seektime)
|
|
||||||
|
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(3):
|
|
||||||
try:
|
|
||||||
async with session.get(url, headers = headers) as resp:
|
async with session.get(url, headers = headers) as resp:
|
||||||
text = await resp.text()
|
text = await resp.text()
|
||||||
next_continuation, actions = parser.parse(json.loads(text))
|
next_continuation, actions = parser.parse(json.loads(text))
|
||||||
except json.JSONDecodeError:
|
|
||||||
print("JSONDecodeError occured")
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise json.JSONDecodeError
|
|
||||||
if actions:
|
if actions:
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
last = parser.get_offset(actions[-1])
|
last = parser.get_offset(actions[-1])
|
||||||
@@ -82,59 +72,50 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
first = first,
|
first = first,
|
||||||
last = last
|
last = last
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
fetch initial blocks.
|
||||||
|
"""
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
result = loop.run_until_complete(
|
blocks = loop.run_until_complete(
|
||||||
_get_blocks(video_id, duration, div, callback))
|
_get_blocks(video_id, duration, div, callback))
|
||||||
return result
|
return blocks
|
||||||
|
|
||||||
def download_chunk(callback, blocks, video_id):
|
def download_patch(callback, blocks, video_id):
|
||||||
|
|
||||||
async def _allocate_workers():
|
async def _allocate_workers():
|
||||||
workers = [
|
workers = [
|
||||||
DownloadWorker(
|
DownloadWorker(
|
||||||
fetch = _fetch,
|
fetch = _fetch, block = block,
|
||||||
block = block,
|
blocks = blocks, video_id = video_id
|
||||||
blocks = blocks,
|
|
||||||
video_id = video_id
|
|
||||||
|
|
||||||
)
|
)
|
||||||
for i,block in enumerate(blocks)
|
for block in blocks
|
||||||
]
|
]
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
tasks = [worker.run(session) for worker in workers]
|
tasks = [worker.run(session) for worker in workers]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
async def _fetch(continuation,session):
|
async def _fetch(continuation,session) -> Patch:
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(3):
|
|
||||||
try:
|
|
||||||
async with session.get(url,headers = config.headers) as resp:
|
async with session.get(url,headers = config.headers) as resp:
|
||||||
chat_json = await resp.text()
|
chat_json = await resp.text()
|
||||||
except json.JSONDecodeError:
|
|
||||||
print("JSONDecodeError occured")
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise json.JSONDecodeError
|
|
||||||
continuation, actions = parser.parse(json.loads(chat_json))
|
continuation, actions = parser.parse(json.loads(chat_json))
|
||||||
if actions:
|
if actions:
|
||||||
last = parser.get_offset(actions[-1])
|
last = parser.get_offset(actions[-1])
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
if callback:
|
if callback:
|
||||||
callback(actions, last - first)
|
callback(actions, last - first)
|
||||||
return actions, continuation, first, last
|
return Patch(actions, continuation, first, last)
|
||||||
return [], continuation, None, None
|
return Patch()
|
||||||
|
"""
|
||||||
|
allocate workers and assign blocks.
|
||||||
|
"""
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(_allocate_workers())
|
loop.run_until_complete(_allocate_workers())
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def _shutdown():
|
||||||
async def shutdown():
|
|
||||||
print("\nshutdown...")
|
print("\nshutdown...")
|
||||||
tasks = [t for t in asyncio.all_tasks()
|
tasks = [t for t in asyncio.all_tasks()
|
||||||
if t is not asyncio.current_task()]
|
if t is not asyncio.current_task()]
|
||||||
@@ -147,5 +128,5 @@ async def shutdown():
|
|||||||
|
|
||||||
def cancel():
|
def cancel():
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.create_task(shutdown())
|
loop.create_task(_shutdown())
|
||||||
|
|
||||||
@@ -1,12 +1,10 @@
|
|||||||
from . import parser
|
from . import parser
|
||||||
class Block:
|
class Block:
|
||||||
"""Block object represents virtual chunk of chatdata.
|
"""Block object represents something like a box
|
||||||
|
to join chunk of chatdata.
|
||||||
|
|
||||||
Parameter:
|
Parameter:
|
||||||
---------
|
---------
|
||||||
pos : int :
|
|
||||||
index of this block on block list.
|
|
||||||
|
|
||||||
first : int :
|
first : int :
|
||||||
videoOffsetTimeMs of the first chat_data
|
videoOffsetTimeMs of the first chat_data
|
||||||
(chat_data[0])
|
(chat_data[0])
|
||||||
@@ -37,23 +35,23 @@ class Block:
|
|||||||
is_last : bool :
|
is_last : bool :
|
||||||
whether this block is the last one in blocklist.
|
whether this block is the last one in blocklist.
|
||||||
|
|
||||||
splitting : bool :
|
during_split : bool :
|
||||||
whether this block is in the process of splitting.
|
whether this block is in the process of during_split.
|
||||||
while True, this block is excluded from duplicate split procedure.
|
while True, this block is excluded from duplicate split procedure.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__slots__ = ['first','last','end','continuation','chat_data','remaining',
|
__slots__ = ['first','last','end','continuation','chat_data','remaining',
|
||||||
'done','is_last','splitting']
|
'done','is_last','during_split']
|
||||||
|
|
||||||
def __init__(self, first = 0, last = 0, end = 0,
|
def __init__(self, first = 0, last = 0, end = 0,
|
||||||
continuation = '', chat_data = [], is_last = False,
|
continuation = '', chat_data = [], is_last = False,
|
||||||
splitting = False):
|
during_split = False):
|
||||||
self.first = first
|
self.first = first
|
||||||
self.last = last
|
self.last = last
|
||||||
self.end = end
|
self.end = end
|
||||||
self.continuation = continuation
|
self.continuation = continuation
|
||||||
self.chat_data = chat_data
|
self.chat_data = chat_data
|
||||||
self.done = False
|
self.done = False
|
||||||
self.remaining = self.end- self.last
|
self.remaining = self.end - self.last
|
||||||
self.is_last = is_last
|
self.is_last = is_last
|
||||||
self.splitting = splitting
|
self.during_split = during_split
|
||||||
87
pytchat/tool/download/dlworker.py
Normal file
87
pytchat/tool/download/dlworker.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from . patch import Patch, fill, split
|
||||||
|
from ... paramgen import arcparam
|
||||||
|
|
||||||
|
class DownloadWorker:
|
||||||
|
"""
|
||||||
|
DownloadWorker associates a download session with a block.
|
||||||
|
|
||||||
|
When the dlworker finishes downloading, the block
|
||||||
|
being downloaded is splitted and assigned the free dlworker.
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
----------
|
||||||
|
fetch : func :
|
||||||
|
download function of asyncdl
|
||||||
|
|
||||||
|
block : Block :
|
||||||
|
Block object that includes chat_data
|
||||||
|
|
||||||
|
blocks : list :
|
||||||
|
List of Block(s)
|
||||||
|
|
||||||
|
video_id : str :
|
||||||
|
|
||||||
|
parent_block : Block :
|
||||||
|
the block from which current block is splitted
|
||||||
|
"""
|
||||||
|
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
|
||||||
|
|
||||||
|
def __init__(self, fetch, block, blocks, video_id ):
|
||||||
|
self.block = block
|
||||||
|
self.fetch = fetch
|
||||||
|
self.blocks = blocks
|
||||||
|
self.video_id = video_id
|
||||||
|
self.parent_block = None
|
||||||
|
|
||||||
|
async def run(self, session):
|
||||||
|
while self.block.continuation:
|
||||||
|
patch = await self.fetch(
|
||||||
|
self.block.continuation, session)
|
||||||
|
if patch.continuation is None:
|
||||||
|
"""TODO : make the dlworker assigned to the last block
|
||||||
|
to work more than twice as possible.
|
||||||
|
"""
|
||||||
|
break
|
||||||
|
if self.parent_block:
|
||||||
|
split(self.parent_block, self.block, patch)
|
||||||
|
self.parent_block = None
|
||||||
|
else:
|
||||||
|
fill(self.block, patch)
|
||||||
|
if self.block.continuation is None:
|
||||||
|
"""finished downloading this block """
|
||||||
|
self.block.done = True
|
||||||
|
self.block = _search_new_block(self)
|
||||||
|
|
||||||
|
def _search_new_block(worker) -> Block:
|
||||||
|
index, undone_block = _get_undone_block(worker.blocks)
|
||||||
|
if undone_block is None:
|
||||||
|
return Block(continuation = None)
|
||||||
|
mean = (undone_block.last + undone_block.end)/2
|
||||||
|
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
|
||||||
|
worker.parent_block = undone_block
|
||||||
|
worker.parent_block.during_split = True
|
||||||
|
new_block = Block(
|
||||||
|
end = undone_block.end,
|
||||||
|
chat_data = [],
|
||||||
|
continuation = continuation,
|
||||||
|
during_split = True,
|
||||||
|
is_last = worker.parent_block.is_last)
|
||||||
|
worker.blocks.insert(index+1, new_block)
|
||||||
|
return new_block
|
||||||
|
|
||||||
|
def _get_undone_block(blocks) -> (int, Block):
|
||||||
|
min_interval_ms = 120000
|
||||||
|
max_remaining = 0
|
||||||
|
undone_block = None
|
||||||
|
index_undone_block = 0
|
||||||
|
for index, block in enumerate(blocks):
|
||||||
|
if block.done or block.during_split:
|
||||||
|
continue
|
||||||
|
remaining = block.remaining
|
||||||
|
if remaining > max_remaining and remaining > min_interval_ms:
|
||||||
|
index_undone_block = index
|
||||||
|
undone_block = block
|
||||||
|
max_remaining = remaining
|
||||||
|
return index_undone_block, undone_block
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
from . import asyncdl
|
from . import asyncdl
|
||||||
|
from . import duplcheck
|
||||||
from . import parser
|
from . import parser
|
||||||
from . duplcheck import duplicate_head, duplicate_tail, overwrap
|
from .. videoinfo import VideoInfo
|
||||||
from . videoinfo import VideoInfo
|
from ... import config
|
||||||
from .. import config
|
from ... exceptions import InvalidVideoIdException
|
||||||
from .. exceptions import InvalidVideoIdException
|
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
headers=config.headers
|
headers=config.headers
|
||||||
@@ -22,36 +22,36 @@ class Downloader:
|
|||||||
self.callback = callback
|
self.callback = callback
|
||||||
self.blocks = []
|
self.blocks = []
|
||||||
|
|
||||||
def ready_blocks(self):
|
def _ready_blocks(self):
|
||||||
result = asyncdl.ready_blocks(
|
blocks = asyncdl.ready_blocks(
|
||||||
self.video_id, self.duration, self.div, self.callback)
|
self.video_id, self.duration, self.div, self.callback)
|
||||||
self.blocks = [block for block in result if block]
|
self.blocks = [block for block in blocks if block]
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def remove_duplicate_head(self):
|
def _remove_duplicate_head(self):
|
||||||
self.blocks = duplicate_head(self.blocks)
|
self.blocks = duplcheck.remove_duplicate_head(self.blocks)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def set_temporary_last(self):
|
def _set_block_end(self):
|
||||||
for i in range(len(self.blocks)-1):
|
for i in range(len(self.blocks)-1):
|
||||||
self.blocks[i].end = self.blocks[i+1].first
|
self.blocks[i].end = self.blocks[i+1].first
|
||||||
self.blocks[-1].end = self.duration*1000
|
self.blocks[-1].end = self.duration*1000
|
||||||
self.blocks[-1].is_last =True
|
self.blocks[-1].is_last =True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def remove_overwrap(self):
|
def _remove_overlap(self):
|
||||||
self.blocks = overwrap(self.blocks)
|
self.blocks = duplcheck.remove_overlap(self.blocks)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def download_blocks(self):
|
def _download_blocks(self):
|
||||||
asyncdl.download_chunk(self.callback, self.blocks, self.video_id)
|
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def remove_duplicate_tail(self):
|
def _remove_duplicate_tail(self):
|
||||||
self.blocks = duplicate_tail(self.blocks)
|
self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def combine(self):
|
def _combine(self):
|
||||||
ret = []
|
ret = []
|
||||||
for block in self.blocks:
|
for block in self.blocks:
|
||||||
ret.extend(block.chat_data)
|
ret.extend(block.chat_data)
|
||||||
@@ -59,13 +59,13 @@ class Downloader:
|
|||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return (
|
return (
|
||||||
self.ready_blocks()
|
self._ready_blocks()
|
||||||
.remove_duplicate_head()
|
._remove_duplicate_head()
|
||||||
.remove_overwrap()
|
._set_block_end()
|
||||||
.set_temporary_last()
|
._remove_overlap()
|
||||||
.download_blocks()
|
._download_blocks()
|
||||||
.remove_duplicate_tail()
|
._remove_duplicate_tail()
|
||||||
.combine()
|
._combine()
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(video_id, div = 1, callback = None, processor = None):
|
def download(video_id, div = 1, callback = None, processor = None):
|
||||||
@@ -20,18 +20,18 @@ def check_duplicate(chatdata):
|
|||||||
and
|
and
|
||||||
tbl_type[i] == tbl_type[j]
|
tbl_type[i] == tbl_type[j]
|
||||||
)
|
)
|
||||||
|
|
||||||
print("creating table...")
|
print("creating table...")
|
||||||
create_table(chatdata,max_range)
|
create_table(chatdata,max_range)
|
||||||
print("searching duplicate data...")
|
print("searching duplicate data...")
|
||||||
|
|
||||||
return [{ "i":{
|
return [{ "i":{
|
||||||
"index" : i, "id" : parser.get_id(chatdata[i]),
|
"index" : i, "id" : parser.get_id(chatdata[i]),
|
||||||
"offsetTime" : parser.get_offset(chatdata[i])
|
"offsetTime" : parser.get_offset(chatdata[i]),
|
||||||
|
"type" : parser.get_type(chatdata[i])
|
||||||
},
|
},
|
||||||
"j":{
|
"j":{
|
||||||
"index" : j, "id" : parser.get_id(chatdata[j]),
|
"index" : j, "id" : parser.get_id(chatdata[j]),
|
||||||
"offsetTime" : parser.get_offset(chatdata[j])
|
"offsetTime" : parser.get_offset(chatdata[j]),
|
||||||
|
"type" : parser.get_type(chatdata[j])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i in range(max_range) for j in range(i+1,max_range)
|
for i in range(max_range) for j in range(i+1,max_range)
|
||||||
@@ -59,18 +59,17 @@ def check_duplicate_offset(chatdata):
|
|||||||
|
|
||||||
print("creating table...")
|
print("creating table...")
|
||||||
create_table(chatdata,max_range)
|
create_table(chatdata,max_range)
|
||||||
print("searching duplicate offset data...")
|
print("searching duplicate data...")
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
"index" : i, "id" : tbl_id[i],
|
"index" : i, "id" : tbl_id[i],
|
||||||
"offsetTime" : tbl_offset[i],
|
"offsetTime" : tbl_offset[i],
|
||||||
"type:" : tbl_type[i]
|
"type:" : tbl_type[i]
|
||||||
|
|
||||||
}
|
}
|
||||||
for i in range(max_range-1)
|
for i in range(max_range-1)
|
||||||
if is_duplicate(i,i+1)]
|
if is_duplicate(i,i+1)]
|
||||||
|
|
||||||
def duplicate_head(blocks):
|
def remove_duplicate_head(blocks):
|
||||||
if len(blocks) == 1 : return blocks
|
if len(blocks) == 1 : return blocks
|
||||||
|
|
||||||
def is_duplicate_head(index):
|
def is_duplicate_head(index):
|
||||||
@@ -97,16 +96,14 @@ def duplicate_head(blocks):
|
|||||||
ret.append(blocks[-1])
|
ret.append(blocks[-1])
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def duplicate_tail(blocks):
|
def remove_duplicate_tail(blocks):
|
||||||
if len(blocks) == 1 : return blocks
|
if len(blocks) == 1 : return blocks
|
||||||
|
|
||||||
def is_duplicate_tail(index):
|
def is_duplicate_tail(index):
|
||||||
|
|
||||||
if len(blocks[index].chat_data) == 0:
|
if len(blocks[index].chat_data) == 0:
|
||||||
return True
|
return True
|
||||||
elif len(blocks[index-1].chat_data) == 0:
|
elif len(blocks[index-1].chat_data) == 0:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
id_0 = parser.get_id(blocks[index-1].chat_data[-1])
|
id_0 = parser.get_id(blocks[index-1].chat_data[-1])
|
||||||
id_1 = parser.get_id(blocks[index].chat_data[-1])
|
id_1 = parser.get_id(blocks[index].chat_data[-1])
|
||||||
type_0 = parser.get_type(blocks[index-1].chat_data[-1])
|
type_0 = parser.get_type(blocks[index-1].chat_data[-1])
|
||||||
@@ -123,32 +120,34 @@ def duplicate_tail(blocks):
|
|||||||
if i == 0 or not is_duplicate_tail(i) ]
|
if i == 0 or not is_duplicate_tail(i) ]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def overwrap(blocks):
|
def remove_overlap(blocks):
|
||||||
|
"""
|
||||||
|
Fix overlapped blocks after ready_blocks().
|
||||||
|
Align the last offset of each block to the first offset
|
||||||
|
of next block (equals `end` offset of each block).
|
||||||
|
"""
|
||||||
if len(blocks) == 1 : return blocks
|
if len(blocks) == 1 : return blocks
|
||||||
|
|
||||||
ret = []
|
for block in blocks:
|
||||||
a = 0
|
if block.is_last:
|
||||||
b = 1
|
|
||||||
jmp = False
|
|
||||||
ret.append(blocks[0])
|
|
||||||
while a < len(blocks)-2:
|
|
||||||
while blocks[a].last > blocks[b].first:
|
|
||||||
b+=1
|
|
||||||
if b == len(blocks)-1:
|
|
||||||
jmp = True
|
|
||||||
break
|
break
|
||||||
if jmp: break
|
if len(block.chat_data)==0:
|
||||||
if b-a == 1:
|
continue
|
||||||
a = b
|
block_end = block.end
|
||||||
else:
|
if block.last >= block_end:
|
||||||
a = b-1
|
for line in reversed(block.chat_data):
|
||||||
ret.append(blocks[a])
|
if parser.get_offset(line) < block_end:
|
||||||
b = a+1
|
break
|
||||||
ret.append(blocks[-1])
|
block.chat_data.pop()
|
||||||
return ret
|
block.last = parser.get_offset(line)
|
||||||
|
block.remaining=0
|
||||||
|
block.done=True
|
||||||
|
block.continuation = None
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _dump(blocks):
|
def _dump(blocks):
|
||||||
print(__name__)
|
print(f"---------- first last end---")
|
||||||
print(f"---------- first last end {'':>3}---")
|
|
||||||
for i,block in enumerate(blocks):
|
for i,block in enumerate(blocks):
|
||||||
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")
|
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from .. import config
|
from ... import config
|
||||||
from .. exceptions import (
|
from ... exceptions import (
|
||||||
ResponseContextError,
|
ResponseContextError,
|
||||||
NoContentsException,
|
NoContentsException,
|
||||||
NoContinuationsException )
|
NoContinuationsException )
|
||||||
@@ -23,15 +23,15 @@ def parse(jsn):
|
|||||||
if jsn is None:
|
if jsn is None:
|
||||||
raise ValueError("parameter JSON is None")
|
raise ValueError("parameter JSON is None")
|
||||||
if jsn['response']['responseContext'].get('errors'):
|
if jsn['response']['responseContext'].get('errors'):
|
||||||
raise ResponseContextError('動画に接続できません。'
|
raise ResponseContextError(
|
||||||
'動画IDが間違っているか、動画が削除/非公開の可能性があります。')
|
'video_id is invalid or private/deleted.')
|
||||||
contents=jsn['response'].get('continuationContents')
|
contents=jsn['response'].get('continuationContents')
|
||||||
if contents is None:
|
if contents is None:
|
||||||
raise NoContentsException('チャットデータを取得できませんでした。')
|
raise NoContentsException('No chat data.')
|
||||||
|
|
||||||
cont = contents['liveChatContinuation']['continuations'][0]
|
cont = contents['liveChatContinuation']['continuations'][0]
|
||||||
if cont is None:
|
if cont is None:
|
||||||
raise NoContinuationsException('Continuationがありません。')
|
raise NoContinuationsException('No Continuation')
|
||||||
metadata = cont.get('liveChatReplayContinuationData')
|
metadata = cont.get('liveChatReplayContinuationData')
|
||||||
if metadata:
|
if metadata:
|
||||||
continuation = metadata.get("continuation")
|
continuation = metadata.get("continuation")
|
||||||
54
pytchat/tool/download/patch.py
Normal file
54
pytchat/tool/download/patch.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
class Patch(NamedTuple):
|
||||||
|
"""
|
||||||
|
Patch represents chunk of chat data
|
||||||
|
which is fetched by asyncdl.download_patch._fetch().
|
||||||
|
"""
|
||||||
|
chats : list = []
|
||||||
|
continuation : str = None
|
||||||
|
first : int = None
|
||||||
|
last : int = None
|
||||||
|
|
||||||
|
def fill(block:Block, patch:Patch):
|
||||||
|
block_end = block.end
|
||||||
|
if patch.last < block_end or block.is_last:
|
||||||
|
set_patch(block, patch)
|
||||||
|
return
|
||||||
|
for line in reversed(patch.chats):
|
||||||
|
line_offset = parser.get_offset(line)
|
||||||
|
if line_offset < block_end:
|
||||||
|
break
|
||||||
|
patch.chats.pop()
|
||||||
|
|
||||||
|
set_patch(block, patch._replace(
|
||||||
|
continuation = None,
|
||||||
|
last = line_offset
|
||||||
|
)
|
||||||
|
)
|
||||||
|
block.remaining=0
|
||||||
|
block.done=True
|
||||||
|
|
||||||
|
|
||||||
|
def split(parent_block:Block, child_block:Block, patch:Patch):
|
||||||
|
parent_block.during_split = False
|
||||||
|
"""patch overlaps with parent_block"""
|
||||||
|
if patch.first <= parent_block.last:
|
||||||
|
child_block.continuation = None
|
||||||
|
''' Leave child_block.during_split == True
|
||||||
|
to exclude from during_split sequence.'''
|
||||||
|
return
|
||||||
|
child_block.during_split = False
|
||||||
|
child_block.first=patch.first
|
||||||
|
parent_block.end =patch.first
|
||||||
|
fill(child_block, patch)
|
||||||
|
|
||||||
|
|
||||||
|
def set_patch(block:Block, patch:Patch):
|
||||||
|
block.continuation = patch.continuation
|
||||||
|
block.chat_data.extend(patch.chats)
|
||||||
|
block.last = patch.last
|
||||||
|
block.remaining = block.end-block.last
|
||||||
|
|
||||||
@@ -1,19 +1,19 @@
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from pytchat.tool import parser
|
from pytchat.tool.download import parser
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from aioresponses import aioresponses
|
from aioresponses import aioresponses
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
from pytchat.tool import asyncdl
|
from pytchat.tool.download import asyncdl
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
def test_asyncdl_split(mocker):
|
def test_asyncdl_split():
|
||||||
|
|
||||||
ret = asyncdl._split(0,1000,1)
|
ret = asyncdl._split(0,1000,1)
|
||||||
assert ret == [0]
|
assert ret == [0]
|
||||||
|
|||||||
@@ -3,47 +3,60 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import os, sys
|
import os, sys
|
||||||
import time
|
import time
|
||||||
from aioresponses import aioresponses
|
from pytchat.tool.download import duplcheck
|
||||||
from pytchat.tool import duplcheck
|
from pytchat.tool.download import parser
|
||||||
from pytchat.tool import parser
|
from pytchat.tool.download.block import Block
|
||||||
from pytchat.tool.block import Block
|
from pytchat.tool.download.duplcheck import _dump
|
||||||
from pytchat.tool.duplcheck import _dump
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
def load_chatdata(filename):
|
|
||||||
|
|
||||||
|
def test_overlap():
|
||||||
|
"""
|
||||||
|
test overlap data
|
||||||
|
operation : [0] [2] [3] [4] -> last :align to end
|
||||||
|
[1] , [5] -> no change
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
|
||||||
|
)[1]
|
||||||
|
|
||||||
|
blocks = (
|
||||||
|
Block(first = 0, last= 12771, end= 9890,chat_data = load_chatdata("dp0-0.json")),
|
||||||
|
Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")),
|
||||||
|
Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")),
|
||||||
|
Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")),
|
||||||
|
Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")),
|
||||||
|
Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
|
||||||
|
)
|
||||||
|
result = duplcheck.remove_overlap(blocks)
|
||||||
|
#dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
|
||||||
|
#but must be aligne to the most close and smaller value:9779.
|
||||||
|
assert result[0].last == 9779
|
||||||
|
|
||||||
|
assert result[1].last == 15800
|
||||||
|
|
||||||
|
assert result[2].last == 32196
|
||||||
|
|
||||||
|
assert result[3].last == 41116
|
||||||
|
|
||||||
|
assert result[4].last == 52384
|
||||||
|
|
||||||
|
#the last block must be always added to result.
|
||||||
|
assert result[5].last == 62875
|
||||||
|
|
||||||
|
def test_duplicate_head():
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
def test_overwrap(mocker):
|
|
||||||
"""
|
|
||||||
test overwrap data
|
|
||||||
operation : [0] , [1] -> discard [1]
|
|
||||||
[0] , [2] , [3] -> discard [2]
|
|
||||||
[3] , [4] , [5] -> discard [4]
|
|
||||||
result : [0] , [3] , [5]
|
|
||||||
|
|
||||||
"""
|
|
||||||
blocks = (
|
|
||||||
Block(first = 0,last= 38771, chat_data = load_chatdata("dp0-0.json")),
|
|
||||||
Block(first = 9890,last= 38771, chat_data = load_chatdata("dp0-1.json")),
|
|
||||||
Block(first = 20244,last= 45146, chat_data = load_chatdata("dp0-2.json")),
|
|
||||||
Block(first = 32476,last= 60520, chat_data = load_chatdata("dp0-3.json")),
|
|
||||||
Block(first = 41380,last= 62875, chat_data = load_chatdata("dp0-4.json")),
|
|
||||||
Block(first = 52568,last= 62875, chat_data = load_chatdata("dp0-5.json"))
|
|
||||||
)
|
|
||||||
result = duplcheck.overwrap(blocks)
|
|
||||||
assert len(result) == 3
|
|
||||||
assert result[0].first == blocks[0].first
|
|
||||||
assert result[0].last == blocks[0].last
|
|
||||||
assert result[1].first == blocks[3].first
|
|
||||||
assert result[1].last == blocks[3].last
|
|
||||||
assert result[2].first == blocks[5].first
|
|
||||||
assert result[2].last == blocks[5].last
|
|
||||||
|
|
||||||
def test_duplicate_head(mocker):
|
|
||||||
"""
|
"""
|
||||||
test duplicate head data
|
test duplicate head data
|
||||||
operation : [0] , [1] -> discard [0]
|
operation : [0] , [1] -> discard [0]
|
||||||
@@ -56,7 +69,7 @@ def test_duplicate_head(mocker):
|
|||||||
result : [0] , [3] , [5]
|
result : [0] , [3] , [5]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
#chat data offsets are ignored.
|
||||||
blocks = (
|
blocks = (
|
||||||
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
|
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
|
||||||
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
|
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
|
||||||
@@ -66,7 +79,7 @@ def test_duplicate_head(mocker):
|
|||||||
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
|
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
|
||||||
)
|
)
|
||||||
_dump(blocks)
|
_dump(blocks)
|
||||||
result = duplcheck.duplicate_head(blocks)
|
result = duplcheck.remove_duplicate_head(blocks)
|
||||||
|
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
assert result[0].first == blocks[2].first
|
assert result[0].first == blocks[2].first
|
||||||
@@ -76,7 +89,7 @@ def test_duplicate_head(mocker):
|
|||||||
assert result[2].first == blocks[5].first
|
assert result[2].first == blocks[5].first
|
||||||
assert result[2].last == blocks[5].last
|
assert result[2].last == blocks[5].last
|
||||||
|
|
||||||
def test_duplicate_tail(mocker):
|
def test_duplicate_tail():
|
||||||
"""
|
"""
|
||||||
test duplicate tail data
|
test duplicate tail data
|
||||||
operation : append [0]
|
operation : append [0]
|
||||||
@@ -88,7 +101,11 @@ def test_duplicate_tail(mocker):
|
|||||||
|
|
||||||
result : [0] , [2] , [4]
|
result : [0] , [2] , [4]
|
||||||
"""
|
"""
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
||||||
|
)[1]
|
||||||
|
#chat data offsets are ignored.
|
||||||
blocks = (
|
blocks = (
|
||||||
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
|
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
|
||||||
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
|
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
|
||||||
@@ -98,7 +115,7 @@ def test_duplicate_tail(mocker):
|
|||||||
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
|
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
|
||||||
)
|
)
|
||||||
|
|
||||||
result = duplcheck.duplicate_tail(blocks)
|
result = duplcheck.remove_duplicate_tail(blocks)
|
||||||
_dump(result)
|
_dump(result)
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
assert result[0].first == blocks[0].first
|
assert result[0].first == blocks[0].first
|
||||||
|
|||||||
232
tests/test_patch.py
Normal file
232
tests/test_patch.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os, sys
|
||||||
|
import time
|
||||||
|
from aioresponses import aioresponses
|
||||||
|
from pytchat.tool.download import duplcheck
|
||||||
|
from pytchat.tool.download import parser
|
||||||
|
from pytchat.tool.download.block import Block
|
||||||
|
from pytchat.tool.download.patch import Patch, fill, split, set_patch
|
||||||
|
from pytchat.tool.download.duplcheck import _dump
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/dl_patch/"+filename))
|
||||||
|
)[1]
|
||||||
|
|
||||||
|
|
||||||
|
def test_split_0():
|
||||||
|
"""
|
||||||
|
Normal case
|
||||||
|
|
||||||
|
@parent_block (# = already downloaded)
|
||||||
|
|
||||||
|
first last end
|
||||||
|
|########----------------------------------------|
|
||||||
|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
first = last = 0 end=parent_end
|
||||||
|
---------------------------------------------------|
|
||||||
|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
|
||||||
|
first last end (after split)
|
||||||
|
|########------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
first last end
|
||||||
|
|###########---------------|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert child.continuation == 'patch'
|
||||||
|
assert parent.last < child.first
|
||||||
|
assert parent.end == child.first
|
||||||
|
assert child.first < child.last
|
||||||
|
assert child.last < child.end
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == False
|
||||||
|
|
||||||
|
def test_split_1():
|
||||||
|
"""patch.first <= parent_block.last
|
||||||
|
|
||||||
|
While awaiting at run()->asyncdl._fetch()
|
||||||
|
downloading parent_block proceeds,
|
||||||
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
|
In this case, fetched patch is all discarded,
|
||||||
|
and dlworker searches other processing block again.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
patch.first
|
||||||
|
first | last end
|
||||||
|
|####################|#####|---------------------|
|
||||||
|
^
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end=parent_end
|
||||||
|
---------------------------------------------------|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end
|
||||||
|
|###########################|--------------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
.............. -> discard all data
|
||||||
|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert parent.last == 33000 #no change
|
||||||
|
assert parent.end == 60000 #no change
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == True #exclude during_split sequence
|
||||||
|
|
||||||
|
def test_split_2():
|
||||||
|
"""child_block.end < patch.last:
|
||||||
|
|
||||||
|
Case the last offset of patch exceeds child_block.end.
|
||||||
|
In this case, remove overlapped data of patch.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
@parent_block (# = already downloaded)
|
||||||
|
first last end (before split)
|
||||||
|
|########------------------------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end=parent_end
|
||||||
|
-----------------------------------------|
|
||||||
|
|
||||||
|
continuation:succeed from patch
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-------- patch --------|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end (after split)
|
||||||
|
|########------------|
|
||||||
|
|
||||||
|
@child_block old patch.end
|
||||||
|
first last=end |
|
||||||
|
|#################|...... cut extra data.
|
||||||
|
^
|
||||||
|
continuation : None (download complete)
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-------- patch --------|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.last < child.first
|
||||||
|
assert parent.end == child.first
|
||||||
|
assert child.first < child.last
|
||||||
|
assert child.last < child.end
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == False
|
||||||
|
|
||||||
|
def test_split_none():
|
||||||
|
"""patch.last <= parent_block.last
|
||||||
|
|
||||||
|
While awaiting at run()->asyncdl._fetch()
|
||||||
|
downloading parent_block proceeds,
|
||||||
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
|
In this case, fetched patch is all discarded,
|
||||||
|
and dlworker searches other processing block again.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
patch.first
|
||||||
|
first | last end
|
||||||
|
|####################|###################|-------|
|
||||||
|
^
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end=parent_end
|
||||||
|
---------------------------------------------------|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
patch.last < parent_block.last .
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end (before split)
|
||||||
|
|########################################|-------|
|
||||||
|
.
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
............ -> discard all data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert parent.last == 40000 #no change
|
||||||
|
assert parent.end == 60000 #no change
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == True #exclude during_split sequence
|
||||||
6128
tests/testdata/dl_duplcheck/overlap/dp0-0.json
vendored
Normal file
6128
tests/testdata/dl_duplcheck/overlap/dp0-0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_duplcheck/overlap/dp0-1.json
vendored
Normal file
3078
tests/testdata/dl_duplcheck/overlap/dp0-1.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_duplcheck/overlap/dp0-2.json
vendored
Normal file
3078
tests/testdata/dl_duplcheck/overlap/dp0-2.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_duplcheck/overlap/dp0-3.json
vendored
Normal file
3078
tests/testdata/dl_duplcheck/overlap/dp0-3.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2529
tests/testdata/dl_duplcheck/overlap/dp0-4.json
vendored
Normal file
2529
tests/testdata/dl_duplcheck/overlap/dp0-4.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1431
tests/testdata/dl_duplcheck/overlap/dp0-5.json
vendored
Normal file
1431
tests/testdata/dl_duplcheck/overlap/dp0-5.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_patch/pt0-0.json
vendored
Normal file
3078
tests/testdata/dl_patch/pt0-0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_patch/pt0-1.json
vendored
Normal file
3078
tests/testdata/dl_patch/pt0-1.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_patch/pt0-3.json
vendored
Normal file
3078
tests/testdata/dl_patch/pt0-3.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_patch/pt0-4.json
vendored
Normal file
3078
tests/testdata/dl_patch/pt0-4.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/dl_patch/pt0-5.json
vendored
Normal file
3078
tests/testdata/dl_patch/pt0-5.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user