Improve dlworker efficiency
This commit is contained in:
@@ -1,27 +1,144 @@
|
||||
from . import parser
|
||||
|
||||
from .. paramgen import arcparam
|
||||
from . block import Block
|
||||
class DownloadWorker:
|
||||
"""
|
||||
DownloadWorker associates a download session with a block.
|
||||
|
||||
Parameter
|
||||
----------
|
||||
fetch :
|
||||
fetch : func :
|
||||
download function of asyncdl
|
||||
|
||||
block :
|
||||
block : Block :
|
||||
Block object that includes chat_data
|
||||
|
||||
blocks : list :
|
||||
List of Block(s)
|
||||
|
||||
video_id : str :
|
||||
|
||||
source_block : Block :
|
||||
the block from which current downloading block is splitted
|
||||
"""
|
||||
def __init__(self, fetch, block):
|
||||
__slots__ = ['fetch', 'block', 'blocks', 'video_id', 'source_block']
|
||||
|
||||
def __init__(self, fetch, block, blocks, video_id ):
|
||||
self.block = block
|
||||
self.fetch = fetch
|
||||
|
||||
self.blocks = blocks
|
||||
self.video_id = video_id
|
||||
self.source_block = None
|
||||
|
||||
async def run(self, session):
|
||||
"""Remove extra chats just after ready_blocks(). """
|
||||
continuation = self.block.start()
|
||||
continuation = initial_fill(self.block)
|
||||
"""download loop """
|
||||
while continuation:
|
||||
chats, new_cont, fetched_last = await self.fetch(continuation, session)
|
||||
continuation = self.block.fill(chats, new_cont, fetched_last )
|
||||
chats, new_cont, fetched_first, fetched_last = await self.fetch(
|
||||
continuation, session)
|
||||
if fetched_first is None:
|
||||
break
|
||||
if self.source_block:
|
||||
continuation = after_dividing_process(
|
||||
self.source_block, self.block, chats, new_cont,
|
||||
fetched_first, fetched_last)
|
||||
self.source_block = None
|
||||
else:
|
||||
continuation = fill(self.block, chats, new_cont, fetched_last)
|
||||
|
||||
if continuation is None:
|
||||
new_block = get_new_block(self)
|
||||
self.block = new_block
|
||||
continuation = new_block.continuation
|
||||
|
||||
def get_new_block(worker) -> Block:
|
||||
worker.block.done = True
|
||||
index,undone_block = get_undone_block(worker.blocks)
|
||||
if undone_block is None:
|
||||
return Block(continuation = None)
|
||||
mean = (undone_block.end + undone_block.last)/2
|
||||
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
|
||||
worker.source_block = undone_block
|
||||
worker.source_block.splitting = True
|
||||
new_block = Block(
|
||||
end = undone_block.end,
|
||||
chat_data = [],
|
||||
continuation = continuation,
|
||||
splitting = True,
|
||||
is_last = worker.source_block.is_last)
|
||||
worker.blocks.insert(index+1,new_block)
|
||||
return new_block
|
||||
|
||||
def get_undone_block(blocks) -> (int, Block):
|
||||
max_remaining = 0
|
||||
ret_block = None
|
||||
ret_index = 0
|
||||
for index, block in enumerate(blocks):
|
||||
if block.done or block.splitting:
|
||||
continue
|
||||
remaining = block.remaining
|
||||
if remaining > max_remaining and remaining > 120000:
|
||||
ret_index = index
|
||||
ret_block = block
|
||||
max_remaining = remaining
|
||||
return ret_index, ret_block
|
||||
|
||||
def top_cut(chats, last) -> list:
|
||||
for i,chat in enumerate(chats):
|
||||
if parser.get_offset(chat) > last:
|
||||
return chats[i:]
|
||||
return []
|
||||
|
||||
def bottom_cut(chats, last) -> list:
|
||||
for rchat in reversed(chats):
|
||||
if parser.get_offset(rchat)>=last:
|
||||
chats.pop()
|
||||
else:
|
||||
break
|
||||
return chats
|
||||
|
||||
|
||||
def after_dividing_process(source_block, block, chats, new_cont,
|
||||
fetched_first, fetched_last):
|
||||
if fetched_last <= source_block.last:
|
||||
return None
|
||||
block.splitting = False
|
||||
source_block.splitting = False
|
||||
source_block.end = fetched_first
|
||||
block.first = fetched_first
|
||||
block.last = fetched_last
|
||||
continuation = new_cont
|
||||
if fetched_first < source_block.last:
|
||||
chats = top_cut(chats, source_block.last)
|
||||
block.first = source_block.last
|
||||
if block.end<fetched_last:
|
||||
chats = bottom_cut(chats, block.end)
|
||||
block.last = block.end
|
||||
continuation = None
|
||||
block.chat_data.extend(chats)
|
||||
block.continuation = continuation
|
||||
return continuation
|
||||
|
||||
def initial_fill(block):
|
||||
chats, cont = _cut(block, block.chat_data, block.continuation, block.last)
|
||||
block.chat_data = chats
|
||||
return cont
|
||||
|
||||
def fill(block, chats, cont, fetched_last):
|
||||
chats, cont = _cut(block, chats, cont, fetched_last)
|
||||
block.chat_data.extend(chats)
|
||||
return cont
|
||||
|
||||
def _cut(block, chats, cont, fetched_last):
|
||||
block.last = fetched_last
|
||||
if fetched_last < block.end or block.is_last:
|
||||
block.last = fetched_last
|
||||
block.remaining=block.end-block.last
|
||||
return chats, cont
|
||||
for i, line in enumerate(chats):
|
||||
line_offset = parser.get_offset(line)
|
||||
if line_offset >= block.end:
|
||||
block.last = line_offset
|
||||
block.remaining=0
|
||||
block.done=True
|
||||
return chats[:i], None
|
||||
Reference in New Issue
Block a user