Files
pytchat-fork/pytchat/tool/dlworker.py
2020-02-11 12:43:11 +09:00

153 lines
4.7 KiB
Python

from . import parser
from .. paramgen import arcparam
from . block import Block
class DownloadWorker:
"""
DownloadWorker associates a download session with a block.
Parameter
----------
fetch : func :
download function of asyncdl
block : Block :
Block object associated with this worker
blocks : list :
List of Block(s)
video_id : str :
source_block : Block :
the Block from which current downloading block is splitted
"""
__slots__ = ['fetch', 'block', 'blocks', 'video_id', 'source_block']
def __init__(self, fetch, block, blocks, video_id ):
self.block = block
self.fetch = fetch
self.blocks = blocks
self.video_id = video_id
self.source_block = None
async def run(self, session):
"""Remove extra chats just after ready_blocks(). """
continuation = initial_fill(self.block)
"""download loop """
while continuation:
chats, new_cont, fetched_first, fetched_last = await self.fetch(
continuation, session)
if fetched_first is None:
break
if self.source_block:
continuation = split_fill(
self.source_block, self.block, chats, new_cont,
fetched_first, fetched_last)
self.source_block = None
else:
continuation = fill(self.block, chats, new_cont, fetched_last)
if continuation is None:
new_block = get_new_block(self)
self.block = new_block
continuation = new_block.continuation
def get_new_block(worker) -> Block:
worker.block.done = True
index,undone_block = search_undone_block(worker.blocks)
if undone_block is None:
return Block(continuation = None)
mean = (undone_block.end + undone_block.last)/2
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
worker.source_block = undone_block
worker.source_block.splitting = True
new_block = Block(
end = undone_block.end,
chat_data = [],
continuation = continuation,
splitting = True,
is_last = worker.source_block.is_last)
worker.blocks.insert(index+1,new_block)
return new_block
def search_undone_block(blocks) -> (int, Block):
"""
Returns
--------
ret_index : int :
index of Block download not completed in blocks .
ret_block : Block :
Block download not completed.
"""
max_remaining = 0
ret_block = None
ret_index = 0
for index, block in enumerate(blocks):
if block.done or block.splitting:
continue
remaining = block.remaining
if remaining > max_remaining and remaining > 120000:
ret_index = index
ret_block = block
max_remaining = remaining
return ret_index, ret_block
def top_cut(chats, last) -> list:
for i, chat in enumerate(chats):
if parser.get_offset(chat) > last:
return chats[i:]
return []
def bottom_cut(chats, last) -> list:
for rchat in reversed(chats):
if parser.get_offset(rchat)>=last:
chats.pop()
else:
break
return chats
def split_fill(source_block, block, chats, new_cont,
fetched_first, fetched_last):
if fetched_last <= source_block.last:
return None
block.splitting = False
source_block.splitting = False
source_block.end = fetched_first
block.first = fetched_first
block.last = fetched_last
continuation = new_cont
if fetched_first < source_block.last:
chats = top_cut(chats, source_block.last)
block.first = source_block.last
if block.end < fetched_last:
chats = bottom_cut(chats, block.end)
block.last = block.end
continuation = None
block.chat_data.extend(chats)
block.continuation = continuation
return continuation
def initial_fill(block):
chats, cont = get_chats(block, block.chat_data, block.continuation, block.last)
block.chat_data = chats
return cont
def fill(block, chats, cont, fetched_last):
chats, cont = get_chats(block, chats, cont, fetched_last)
block.chat_data.extend(chats)
return cont
def get_chats(block, chats, cont, fetched_last):
block.last = fetched_last
if fetched_last < block.end or block.is_last:
block.last = fetched_last
block.remaining=block.end-block.last
return chats, cont
for i, line in enumerate(chats):
line_offset = parser.get_offset(line)
if line_offset >= block.end:
block.last = line_offset
block.remaining = 0
block.done = True
return chats[:i], None