Aggregate return values with patch class

2020-02-16 20:43:12 +09:00
parent 6fdb3bf8cf
commit c4cf424702
23 changed files with 35254 additions and 327 deletions
--- a/pytchat/tool/dlworker.py
+++ b/pytchat/tool/dlworker.py
@@ -1,153 +0,0 @@
-from . import parser
-from .. paramgen import arcparam
-from . block import Block
-class DownloadWorker:
-    """
-    DownloadWorker associates a download session with a block.
-
-    Parameter
-    ----------
-    fetch : func :
-        download function of asyncdl
-
-    block : Block :
-        Block object associated with this worker 
-    
-    blocks : list :
-        List of Block(s)
-
-    video_id : str :
-
-    source_block : Block :
-        the Block from which current downloading block is splitted 
-    """
-    __slots__ = ['fetch', 'block', 'blocks', 'video_id', 'source_block']
-
-    def __init__(self, fetch, block, blocks, video_id ):
-        self.block = block
-        self.fetch = fetch
-        self.blocks = blocks
-        self.video_id = video_id
-        self.source_block = None
-
-    async def run(self, session):
-        """Remove extra chats just after ready_blocks(). """
-        continuation = initial_fill(self.block)
-        """download loop """
-        while continuation:
-            chats, new_cont, fetched_first, fetched_last = await self.fetch(
-                continuation, session)
-            if fetched_first is None:
-                break
-            if self.source_block:
-                continuation = split_fill(
-                    self.source_block, self.block, chats, new_cont, 
-                    fetched_first, fetched_last)
-                self.source_block = None
-            else:    
-                continuation = fill(self.block, chats, new_cont, fetched_last)
-
-            if continuation is None:
-                new_block = get_new_block(self)
-                self.block = new_block
-                continuation = new_block.continuation
-
-def get_new_block(worker) -> Block:
-    worker.block.done = True
-    index,undone_block = search_undone_block(worker.blocks)
-    if undone_block is None:
-        return Block(continuation = None)
-    mean = (undone_block.end + undone_block.last)/2
-    continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
-    worker.source_block = undone_block
-    worker.source_block.splitting = True
-    new_block = Block(
-        end =  undone_block.end,
-        chat_data = [], 
-        continuation = continuation,
-        splitting = True,
-        is_last = worker.source_block.is_last)
-    worker.blocks.insert(index+1,new_block)
-    return new_block
-
-def search_undone_block(blocks) -> (int, Block):
-    """
-    Returns 
-    --------
-    ret_index : int :
-        index of Block download not completed in blocks .
-    
-    ret_block : Block :
-        Block download not completed.
-    """
-    max_remaining = 0
-    ret_block = None
-    ret_index = 0
-    for index, block in enumerate(blocks):
-        if block.done or block.splitting:
-            continue
-        remaining = block.remaining
-        if remaining > max_remaining and remaining > 120000:
-            ret_index = index
-            ret_block = block
-            max_remaining = remaining
-    return ret_index, ret_block
-
-def top_cut(chats, last) -> list:
-    for i, chat in enumerate(chats):
-        if parser.get_offset(chat) > last:
-            return chats[i:]
-    return []
-
-def bottom_cut(chats, last) -> list:
-    for rchat in reversed(chats):
-        if parser.get_offset(rchat)>=last:
-            chats.pop()
-        else:
-            break
-    return chats
-            
-def split_fill(source_block, block,  chats, new_cont, 
-    fetched_first, fetched_last):
-    if fetched_last <= source_block.last:
-        return None
-    block.splitting = False
-    source_block.splitting = False
-    source_block.end =  fetched_first
-    block.first = fetched_first
-    block.last = fetched_last
-    continuation = new_cont
-    if fetched_first < source_block.last:
-        chats = top_cut(chats, source_block.last)
-        block.first = source_block.last
-    if block.end < fetched_last:
-        chats = bottom_cut(chats, block.end)
-        block.last = block.end
-        continuation = None
-    block.chat_data.extend(chats)
-    block.continuation = continuation
-    return continuation
-
-def initial_fill(block):
-    chats, cont = get_chats(block, block.chat_data, block.continuation, block.last)
-    block.chat_data = chats
-    return cont
-
-def fill(block, chats, cont, fetched_last):
-    chats, cont = get_chats(block, chats, cont, fetched_last)
-    block.chat_data.extend(chats)
-    return cont
-
-def get_chats(block, chats, cont, fetched_last):
-    block.last = fetched_last
-    if fetched_last < block.end or block.is_last:
-        block.last = fetched_last
-        block.remaining=block.end-block.last
-        return chats, cont
-    for i, line in enumerate(chats):
-        line_offset = parser.get_offset(line)
-        if line_offset >= block.end:
-            block.last = line_offset
-            block.remaining = 0
-            block.done = True
-            return chats[:i], None
--- a/pytchat/tool/download/init.py
+++ b/pytchat/tool/download/init.py
--- a/pytchat/tool/download/asyncdl.py
+++ b/pytchat/tool/download/asyncdl.py
@@ -5,16 +5,17 @@ import json
 from . import parser
 from . block import Block
 from . dlworker import DownloadWorker
-from .. paramgen import arcparam
-from .. import config 
-from urllib.parse import quote
+from . patch import Patch
+from ... import config 
+from ... paramgen import arcparam
 from concurrent.futures import CancelledError
+from urllib.parse import quote

 headers = config.headers
 REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
             "get_live_chat_replay?continuation="

-def _split(start, end, count, min_interval = 120):
+def _split(start, end, count, min_interval_sec = 120):
    """
    Split section from `start` to `end` into `count` pieces,
    and returns the beginning of each piece. 
@@ -23,7 +24,7 @@ def _split(start, end, count, min_interval = 120):

    Returns:
    --------
-    List of the beginning position of each piece.
+        List of the offset of each block's first chat data.
    """
    
    if not (isinstance(start,int) or isinstance(start,float)) or \
@@ -35,14 +36,14 @@ def _split(start, end, count, min_interval = 120):
        raise ValueError("end must be equal to or greater than start.")
    if count<1:
        raise ValueError("count must be equal to or greater than 1.")
-    if (end-start)/count < min_interval:
-        count = int((end-start)/min_interval) 
+    if (end-start)/count < min_interval_sec:
+        count = int((end-start)/min_interval_sec) 
        if count == 0 : count = 1
    interval= (end-start)/count 
    
    if count == 1:
        return [start]
-    return sorted(list(set([int(start+interval*j)
+    return sorted( list(set( [int(start + interval*j)
        for j in range(count) ])))

 def ready_blocks(video_id, duration, div, callback):
@@ -50,27 +51,16 @@ def ready_blocks(video_id, duration, div, callback):

    async def _get_blocks( video_id, duration, div, callback):
        async with aiohttp.ClientSession() as session:
-            tasks = [_create_block(session, video_id, pos, seektime, callback)
-                for pos, seektime in enumerate(_split(-1, duration, div))]
+            tasks = [_create_block(session, video_id,  seektime, callback)
+                for  seektime in _split(-1, duration, div)]
            return await asyncio.gather(*tasks)

-    async def _create_block(session, video_id, pos, seektime, callback):
-        continuation = arcparam.getparam(
-            video_id, seektime = seektime)
-        
+    async def _create_block(session, video_id, seektime, callback):
+        continuation = arcparam.getparam(video_id, seektime = seektime)
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
-        for _ in range(3):
-            try:
-                async with session.get(url, headers = headers) as resp:
-                    text = await resp.text()
-                next_continuation, actions = parser.parse(json.loads(text))
-            except json.JSONDecodeError:
-                print("JSONDecodeError occured")
-                await asyncio.sleep(1)
-                continue
-            break
-        else:
-            raise json.JSONDecodeError
+        async with session.get(url, headers = headers) as resp:
+            text = await resp.text()
+        next_continuation, actions = parser.parse(json.loads(text))
        if actions:
            first = parser.get_offset(actions[0])
            last = parser.get_offset(actions[-1])
@@ -82,59 +72,50 @@ def ready_blocks(video_id, duration, div, callback):
                first = first,
                last = last
            )
-
+    """
+    fetch initial blocks.
+    """  
    loop = asyncio.get_event_loop()
-    result = loop.run_until_complete(
+    blocks = loop.run_until_complete(
        _get_blocks(video_id, duration, div, callback))
-    return result
+    return blocks

-def download_chunk(callback, blocks, video_id):
+def download_patch(callback, blocks, video_id):

    async def _allocate_workers():
        workers = [
            DownloadWorker(
-                fetch = _fetch,
-                block = block,
-                blocks = blocks,
-                video_id = video_id
-
+                fetch = _fetch,  block = block,
+                blocks = blocks, video_id = video_id
            )
-            for i,block in enumerate(blocks)
+            for block in blocks
        ]
        async with aiohttp.ClientSession() as session:
            tasks = [worker.run(session) for worker in workers]
            return await asyncio.gather(*tasks)    

-    async def _fetch(continuation,session):
+    async def _fetch(continuation,session) -> Patch:
        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
-        for _ in range(3):
-            try:
-                async with session.get(url,headers = config.headers) as resp:
-                    chat_json = await resp.text()
-            except json.JSONDecodeError:
-                print("JSONDecodeError occured")
-                await asyncio.sleep(1)
-                continue
-            break
-        else:
-            raise json.JSONDecodeError
+        async with session.get(url,headers = config.headers) as resp:
+            chat_json = await resp.text()
        continuation, actions = parser.parse(json.loads(chat_json))
        if actions:
            last = parser.get_offset(actions[-1])
            first = parser.get_offset(actions[0])
            if callback:
                callback(actions, last - first)
-            return actions, continuation, first, last
-        return [], continuation, None, None
-    
+            return Patch(actions, continuation, first, last)
+        return Patch()
+    """
+    allocate workers and assign blocks.
+    """   
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(_allocate_workers())
    except CancelledError:
        pass

-
-async def shutdown():
+async def _shutdown():
    print("\nshutdown...")
    tasks = [t for t in asyncio.all_tasks()
         if t is not asyncio.current_task()]
@@ -147,5 +128,5 @@ async def shutdown():

 def cancel():
    loop = asyncio.get_event_loop()
-    loop.create_task(shutdown())
+    loop.create_task(_shutdown())
    
--- a/pytchat/tool/download/block.py
+++ b/pytchat/tool/download/block.py
@@ -1,12 +1,10 @@
 from . import parser
 class Block:
-    """Block object represents virtual chunk of chatdata.
+    """Block object represents something like a box 
+    to join chunk of chatdata.

    Parameter:
    ---------
-    pos : int :
-        index of this block on block list.
-
    first : int :
        videoOffsetTimeMs of the first chat_data 
        (chat_data[0])
@@ -37,23 +35,23 @@ class Block:
    is_last : bool :
        whether this block is the last one in blocklist.

-    splitting : bool :
-        whether this block is in the process of splitting.
+    during_split : bool :
+        whether this block is in the process of during_split.
        while True, this block is excluded from duplicate split procedure.
    """
    
    __slots__ = ['first','last','end','continuation','chat_data','remaining',
-        'done','is_last','splitting']
+        'done','is_last','during_split']

-    def __init__(self,  first = 0, last = 0, end = 0,
+    def __init__(self, first = 0, last = 0, end = 0,
                continuation = '', chat_data = [], is_last = False,
-                splitting = False):
+                during_split = False):
        self.first = first
        self.last = last
        self.end = end
        self.continuation = continuation
        self.chat_data = chat_data
        self.done = False
-        self.remaining = self.end- self.last
+        self.remaining = self.end - self.last
        self.is_last = is_last
-        self.splitting = splitting
+        self.during_split = during_split
--- a/pytchat/tool/download/dlworker.py
+++ b/pytchat/tool/download/dlworker.py
@@ -0,0 +1,87 @@
+from . import parser
+from . block import Block
+from . patch import Patch, fill, split
+from ... paramgen import arcparam
+
+class DownloadWorker:
+    """
+    DownloadWorker associates a download session with a block.
+
+    When the dlworker finishes downloading, the block
+    being downloaded is splitted and assigned the free dlworker.
+
+    Parameter
+    ----------
+    fetch : func :
+        download function of asyncdl
+
+    block : Block :
+        Block object that includes chat_data
+    
+    blocks : list :
+        List of Block(s)
+
+    video_id : str :
+
+    parent_block : Block :
+        the block from which current block is splitted 
+    """
+    __slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
+
+    def __init__(self, fetch, block, blocks, video_id ):
+        self.block = block
+        self.fetch = fetch
+        self.blocks = blocks
+        self.video_id = video_id
+        self.parent_block = None
+
+    async def run(self, session):
+        while self.block.continuation:
+            patch = await self.fetch(
+                self.block.continuation, session)
+            if patch.continuation is None:
+                """TODO : make the dlworker assigned to the last block
+                to work more than twice as possible.
+                """
+                break
+            if self.parent_block:
+                split(self.parent_block, self.block, patch)
+                self.parent_block = None
+            else:    
+                fill(self.block, patch)
+            if self.block.continuation is None:
+                """finished downloading this block """
+                self.block.done = True
+                self.block = _search_new_block(self)
+
+def _search_new_block(worker) -> Block:
+    index, undone_block = _get_undone_block(worker.blocks)
+    if undone_block is None:
+        return Block(continuation = None)
+    mean = (undone_block.last + undone_block.end)/2
+    continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
+    worker.parent_block = undone_block
+    worker.parent_block.during_split = True
+    new_block = Block(
+        end =  undone_block.end,
+        chat_data = [], 
+        continuation = continuation,
+        during_split = True,
+        is_last = worker.parent_block.is_last)
+    worker.blocks.insert(index+1, new_block)
+    return new_block
+
+def _get_undone_block(blocks) -> (int, Block):
+    min_interval_ms = 120000
+    max_remaining = 0
+    undone_block = None
+    index_undone_block = 0
+    for index, block in enumerate(blocks):
+        if block.done or block.during_split:
+            continue
+        remaining = block.remaining
+        if remaining > max_remaining and remaining > min_interval_ms:
+            index_undone_block = index
+            undone_block = block
+            max_remaining = remaining
+    return index_undone_block, undone_block
--- a/pytchat/tool/download/downloader.py
+++ b/pytchat/tool/download/downloader.py
@@ -1,9 +1,9 @@
 from . import asyncdl
+from . import duplcheck 
 from . import parser
-from . duplcheck import duplicate_head, duplicate_tail, overwrap
-from . videoinfo import VideoInfo
-from .. import config
-from .. exceptions import InvalidVideoIdException
+from .. videoinfo import VideoInfo
+from ... import config
+from ... exceptions import InvalidVideoIdException

 logger = config.logger(__name__)
 headers=config.headers
@@ -22,36 +22,36 @@ class Downloader:
        self.callback = callback
        self.blocks = []

-    def ready_blocks(self):
-        result = asyncdl.ready_blocks(
+    def _ready_blocks(self):
+        blocks = asyncdl.ready_blocks(
            self.video_id, self.duration, self.div, self.callback)
-        self.blocks = [block for block in result if block]
+        self.blocks = [block for block in blocks if block]
        return self  

-    def remove_duplicate_head(self):
-        self.blocks = duplicate_head(self.blocks)
+    def _remove_duplicate_head(self):
+        self.blocks = duplcheck.remove_duplicate_head(self.blocks)
        return self

-    def set_temporary_last(self):
+    def _set_block_end(self):
        for i in range(len(self.blocks)-1):
            self.blocks[i].end = self.blocks[i+1].first
        self.blocks[-1].end = self.duration*1000
        self.blocks[-1].is_last =True
        return self

-    def remove_overwrap(self):
-        self.blocks = overwrap(self.blocks)
+    def _remove_overlap(self):
+        self.blocks = duplcheck.remove_overlap(self.blocks)
        return self

-    def download_blocks(self):
-        asyncdl.download_chunk(self.callback, self.blocks, self.video_id)
+    def _download_blocks(self):
+        asyncdl.download_patch(self.callback, self.blocks, self.video_id)
        return self

-    def remove_duplicate_tail(self):
-        self.blocks = duplicate_tail(self.blocks)
+    def _remove_duplicate_tail(self):
+        self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
        return self

-    def combine(self):
+    def _combine(self):
        ret = []
        for block in self.blocks:
            ret.extend(block.chat_data) 
@@ -59,13 +59,13 @@ class Downloader:

    def download(self):
        return (
-            self.ready_blocks()
-                .remove_duplicate_head()
-                .remove_overwrap()
-                .set_temporary_last()
-                .download_blocks()
-                .remove_duplicate_tail()
-                .combine()
+            self._ready_blocks()
+                ._remove_duplicate_head()
+                ._set_block_end()
+                ._remove_overlap()
+                ._download_blocks()
+                ._remove_duplicate_tail()
+                ._combine()
        )

 def download(video_id, div = 1, callback = None, processor = None):
@@ -86,4 +86,4 @@ def download(video_id, div = 1, callback = None, processor = None):
    )

 def cancel():
-    asyncdl.cancel()
+    asyncdl.cancel()
--- a/pytchat/tool/download/duplcheck.py
+++ b/pytchat/tool/download/duplcheck.py
@@ -20,18 +20,18 @@ def check_duplicate(chatdata):
                and
            tbl_type[i] == tbl_type[j]
        )
-
    print("creating table...")
    create_table(chatdata,max_range)
    print("searching duplicate data...")
-
    return [{ "i":{
                "index" : i, "id" : parser.get_id(chatdata[i]),
-                "offsetTime" : parser.get_offset(chatdata[i])
+                "offsetTime" : parser.get_offset(chatdata[i]),
+                "type" : parser.get_type(chatdata[i])
                },
            "j":{
                "index" : j, "id" : parser.get_id(chatdata[j]),
-                "offsetTime" : parser.get_offset(chatdata[j])
+                "offsetTime" : parser.get_offset(chatdata[j]),
+                "type" : parser.get_type(chatdata[j])
                }
            }
        for i in range(max_range) for j in range(i+1,max_range) 
@@ -59,18 +59,17 @@ def check_duplicate_offset(chatdata):

    print("creating table...")
    create_table(chatdata,max_range)
-    print("searching duplicate offset data...")
+    print("searching duplicate data...")

    return [{
                "index" : i, "id" : tbl_id[i],
                "offsetTime" : tbl_offset[i],
                "type:" : tbl_type[i]
-                
            }
        for i in range(max_range-1)
        if is_duplicate(i,i+1)]

-def duplicate_head(blocks):
+def remove_duplicate_head(blocks):
    if len(blocks) == 1 : return blocks

    def is_duplicate_head(index):
@@ -97,16 +96,14 @@ def duplicate_head(blocks):
    ret.append(blocks[-1])
    return ret

-def duplicate_tail(blocks):
+def remove_duplicate_tail(blocks):
    if len(blocks) == 1 : return blocks    

    def is_duplicate_tail(index):
-
        if len(blocks[index].chat_data) == 0:
            return True
        elif len(blocks[index-1].chat_data) == 0:
            return False
-  
        id_0 = parser.get_id(blocks[index-1].chat_data[-1])
        id_1 = parser.get_id(blocks[index].chat_data[-1])
        type_0 = parser.get_type(blocks[index-1].chat_data[-1])
@@ -123,32 +120,34 @@ def duplicate_tail(blocks):
        if i == 0 or not  is_duplicate_tail(i) ]
    return ret

-def overwrap(blocks):
+def remove_overlap(blocks):
+    """
+    Fix overlapped blocks after ready_blocks().
+    Align the last offset of each block to the first offset 
+    of next block (equals `end` offset of each block).
+    """
    if len(blocks) == 1 : return blocks

-    ret = []
-    a = 0
-    b = 1
-    jmp = False
-    ret.append(blocks[0])
-    while a < len(blocks)-2:
-        while blocks[a].last > blocks[b].first:
-            b+=1
-            if b == len(blocks)-1:
-                jmp = True    
-                break
-        if jmp: break
-        if b-a == 1:
-            a = b
-        else:
-            a = b-1
-        ret.append(blocks[a])
-        b = a+1
-    ret.append(blocks[-1])
-    return ret
+    for block in blocks:
+        if block.is_last:
+            break
+        if len(block.chat_data)==0:
+            continue
+        block_end = block.end
+        if block.last >= block_end:
+            for line in reversed(block.chat_data):
+                if parser.get_offset(line) < block_end:
+                    break
+                block.chat_data.pop()
+            block.last = parser.get_offset(line)
+            block.remaining=0
+            block.done=True
+            block.continuation = None
+    return blocks
+    
+        

 def _dump(blocks):
-    print(__name__)
-    print(f"----------        first         last   end {'':>3}---")
+    print(f"----------        first         last         end---")
    for i,block in enumerate(blocks):
        print(f"block[{i:3}]   {block.first:>10}   {block.last:>10}  {block.end:>10}")
--- a/pytchat/tool/download/parser.py
+++ b/pytchat/tool/download/parser.py
@@ -1,6 +1,6 @@
 import json
-from .. import config
-from .. exceptions import ( 
+from ... import config
+from ... exceptions import ( 
    ResponseContextError, 
    NoContentsException, 
    NoContinuationsException )
@@ -23,15 +23,15 @@ def parse(jsn):
    if jsn is None: 
        raise ValueError("parameter JSON is None")
    if jsn['response']['responseContext'].get('errors'):
-        raise ResponseContextError('動画に接続できません。'
-    '動画IDが間違っているか、動画が削除／非公開の可能性があります。')
+        raise ResponseContextError(
+    'video_id is invalid or private/deleted.')
    contents=jsn['response'].get('continuationContents')
    if contents is None:
-        raise NoContentsException('チャットデータを取得できませんでした。')
+        raise NoContentsException('No chat data.')

    cont = contents['liveChatContinuation']['continuations'][0]
    if cont is None:
-        raise NoContinuationsException('Continuationがありません。')
+        raise NoContinuationsException('No Continuation')
    metadata = cont.get('liveChatReplayContinuationData')
    if metadata:
        continuation = metadata.get("continuation")
--- a/pytchat/tool/download/patch.py
+++ b/pytchat/tool/download/patch.py
@@ -0,0 +1,54 @@
+from . import parser
+from . block import Block
+from typing import NamedTuple
+
+class Patch(NamedTuple):
+    """
+    Patch represents chunk of chat data
+    which is fetched by asyncdl.download_patch._fetch().
+    """
+    chats : list = []
+    continuation : str = None
+    first : int = None
+    last : int = None
+
+def fill(block:Block, patch:Patch):
+    block_end = block.end
+    if patch.last < block_end or block.is_last:
+        set_patch(block, patch)
+        return
+    for line in reversed(patch.chats):
+        line_offset = parser.get_offset(line)
+        if line_offset < block_end:
+            break
+        patch.chats.pop()
+        
+    set_patch(block, patch._replace(
+        continuation = None,
+        last = line_offset
+        )
+    )
+    block.remaining=0
+    block.done=True
+
+
+def split(parent_block:Block, child_block:Block, patch:Patch):
+    parent_block.during_split = False
+    """patch overlaps with parent_block"""
+    if patch.first <= parent_block.last:
+        child_block.continuation = None
+        ''' Leave child_block.during_split == True 
+         to exclude from during_split sequence.'''
+        return    
+    child_block.during_split = False
+    child_block.first=patch.first
+    parent_block.end =patch.first
+    fill(child_block, patch)
+    
+
+def set_patch(block:Block, patch:Patch):
+    block.continuation = patch.continuation
+    block.chat_data.extend(patch.chats)
+    block.last = patch.last
+    block.remaining = block.end-block.last        
+
--- a/tests/test_dl_asyncdl.py
+++ b/tests/test_dl_asyncdl.py
@@ -1,19 +1,19 @@
 import aiohttp
 import asyncio
 import json
-from pytchat.tool import parser
+from pytchat.tool.download import parser
 import sys
 import time
 from aioresponses import aioresponses
 from concurrent.futures import CancelledError
-from pytchat.tool import asyncdl
+from pytchat.tool.download import asyncdl

 def _open_file(path):
    with open(path,mode ='r',encoding = 'utf-8') as f:
        return f.read()


-def test_asyncdl_split(mocker):
+def test_asyncdl_split():

    ret = asyncdl._split(0,1000,1)
    assert ret == [0]
--- a/tests/test_dl_duplcheck.py
+++ b/tests/test_dl_duplcheck.py
@@ -3,60 +3,73 @@ import asyncio
 import json
 import os, sys
 import time
-from aioresponses import aioresponses
-from pytchat.tool import duplcheck
-from pytchat.tool import parser
-from pytchat.tool.block import Block
-from pytchat.tool.duplcheck import _dump
+from pytchat.tool.download import duplcheck
+from pytchat.tool.download import parser
+from pytchat.tool.download.block import Block
+from pytchat.tool.download.duplcheck import _dump
 def _open_file(path):
    with open(path,mode ='r',encoding = 'utf-8') as f:
        return f.read()

-def load_chatdata(filename):
+
+
+def test_overlap():
+    """
+    test overlap data 
+        operation : [0]  [2] [3]  [4] -> last :align to end
+                    [1] , [5] -> no change
+        
+    """
+
+    def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
+        )[1]
+
+    blocks = (
+        Block(first = 0,    last= 12771, end=  9890,chat_data = load_chatdata("dp0-0.json")),     
+        Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")), 
+        Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")), 
+        Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")), 
+        Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")), 
+        Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
+    )
+    result = duplcheck.remove_overlap(blocks)
+    #dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first), 
+    #but must be aligne to the most close and smaller value:9779.
+    assert result[0].last == 9779
+    
+    assert result[1].last == 15800
+    
+    assert result[2].last == 32196
+    
+    assert result[3].last == 41116
+    
+    assert result[4].last == 52384
+    
+    #the last block must be always added to result.
+    assert result[5].last == 62875
+    
+def test_duplicate_head():
+
+    def load_chatdata(filename):
        return parser.parse(
            json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
        )[1]

-def test_overwrap(mocker):
-    """
-    test overwrap data 
-        operation : [0] , [1] -> discard [1]
-                    [0] , [2] , [3] -> discard [2]
-                    [3] , [4] , [5] -> discard [4]
-        result    : [0] , [3] , [5] 
-
-    """
-    blocks = (
-        Block(first = 0,last= 38771, chat_data = load_chatdata("dp0-0.json")),     
-        Block(first = 9890,last= 38771, chat_data = load_chatdata("dp0-1.json")), 
-        Block(first = 20244,last= 45146, chat_data = load_chatdata("dp0-2.json")), 
-        Block(first = 32476,last= 60520, chat_data = load_chatdata("dp0-3.json")), 
-        Block(first = 41380,last= 62875, chat_data = load_chatdata("dp0-4.json")), 
-        Block(first = 52568,last= 62875, chat_data = load_chatdata("dp0-5.json"))
-    )
-    result = duplcheck.overwrap(blocks)
-    assert len(result) == 3
-    assert result[0].first == blocks[0].first
-    assert result[0].last  == blocks[0].last
-    assert result[1].first == blocks[3].first
-    assert result[1].last  == blocks[3].last
-    assert result[2].first == blocks[5].first
-    assert result[2].last  == blocks[5].last
-    
-def test_duplicate_head(mocker):
    """
    test duplicate head data 
        operation : [0] , [1]  -> discard [0]
                    [1] , [2]  -> discard [1]
-                    [2] , [3]  -> append [2]
+                    [2] , [3]  -> append  [2]
                    [3] , [4]  -> discard [3]
-                    [4] , [5]  -> append [4]
+                    [4] , [5]  -> append  [4]
                    append [5]

        result    : [0] , [3] , [5] 
    """

-
+    #chat data offsets are ignored.
    blocks = (
        Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),     
        Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")), 
@@ -66,7 +79,7 @@ def test_duplicate_head(mocker):
        Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
    )
    _dump(blocks)
-    result = duplcheck.duplicate_head(blocks)
+    result = duplcheck.remove_duplicate_head(blocks)
    
    assert len(result) == 3
    assert result[0].first == blocks[2].first
@@ -76,19 +89,23 @@ def test_duplicate_head(mocker):
    assert result[2].first == blocks[5].first
    assert result[2].last  == blocks[5].last

-def test_duplicate_tail(mocker):
+def test_duplicate_tail():
    """
    test duplicate tail data 
        operation : append [0]
                    [0] , [1]  -> discard [1]
-                    [1] , [2]  -> append [2]
+                    [1] , [2]  -> append  [2]
                    [2] , [3]  -> discard [3]
-                    [3] , [4]  -> append [4]
+                    [3] , [4]  -> append  [4]
                    [4] , [5]  -> discard [5]

        result    : [0] , [2] , [4] 
    """
-
+    def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
+        )[1]
+    #chat data offsets are ignored.    
    blocks = (
        Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),     
        Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")), 
@@ -98,7 +115,7 @@ def test_duplicate_tail(mocker):
        Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
    )

-    result = duplcheck.duplicate_tail(blocks)
+    result = duplcheck.remove_duplicate_tail(blocks)
    _dump(result)
    assert len(result) == 3
    assert result[0].first == blocks[0].first
--- a/tests/test_patch.py
+++ b/tests/test_patch.py
@@ -0,0 +1,232 @@
+import aiohttp
+import asyncio
+import json
+import os, sys
+import time
+from aioresponses import aioresponses
+from pytchat.tool.download import duplcheck
+from pytchat.tool.download import parser
+from pytchat.tool.download.block import Block
+from pytchat.tool.download.patch import Patch, fill, split, set_patch
+from pytchat.tool.download.duplcheck import _dump
+def _open_file(path):
+    with open(path,mode ='r',encoding = 'utf-8') as f:
+        return f.read()
+
+def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_patch/"+filename))
+        )[1]
+
+
+def test_split_0():
+    """
+    Normal case
+
+     @parent_block  (# = already downloaded)
+    
+     first    last                                     end
+       |########----------------------------------------|
+    
+
+     @child_block
+    
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+    
+
+     @fetched patch
+                            |-- patch --|
+    
+     
+                             |
+                             |
+                             V 
+    
+     @parent_block
+    
+     first    last         end (after split)   
+       |########------------|
+    
+     @child_block
+                          first       last            end            
+                            |###########---------------|
+    
+     @fetched patch
+                            |-- patch --|
+    """
+    parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert child.continuation == 'patch'
+    assert parent.last < child.first
+    assert parent.end == child.first
+    assert child.first < child.last
+    assert child.last < child.end
+    assert parent.during_split == False
+    assert child.during_split == False
+
+def test_split_1():
+    """patch.first <= parent_block.last
+
+    While awaiting at run()->asyncdl._fetch()
+    downloading parent_block proceeds, 
+    and parent.block.last exceeds patch.first.
+
+    In this case, fetched patch is all discarded,
+    and dlworker searches other processing block again. 
+
+    ~~~~~~ before ~~~~~~
+
+                          patch.first
+      first                  |   last                  end
+       |####################|#####|---------------------|
+                            ^
+     @child_block
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+     
+     @fetched patch
+                            |-- patch --|
+    
+     
+                             |
+                             |
+                             V 
+    
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first                       last                  end
+       |###########################|--------------------|
+    
+     @child_block
+                                
+                            ..............　-> 　discard all data
+                   
+    """
+    parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert parent.last == 33000 #no change
+    assert parent.end == 60000 #no change
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == True #exclude during_split sequence
+
+def test_split_2():
+    """child_block.end < patch.last:
+
+    Case the last offset of patch exceeds child_block.end.
+    In this case, remove overlapped data of patch.
+
+    ~~~~~~ before ~~~~~~
+
+     @parent_block  (# = already downloaded)
+     first    last                           end (before split)
+       |########------------------------------|
+    
+     @child_block
+     first = last = 0                        end=parent_end
+     -----------------------------------------|
+    
+    continuation:succeed from patch
+    
+     @fetched patch
+                            |-------- patch --------|
+    
+     
+                             |
+                             |
+                             V 
+
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first    last         end (after split)   
+       |########------------|
+
+     @child_block                                  old patch.end            
+                          first            last=end |
+                            |#################|......   cut extra data.
+                                                    ^
+    continuation : None (download complete)
+
+     @fetched patch                                 
+                            |-------- patch --------|
+    """
+    parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+     
+    split(parent,child,patch)
+
+    assert child.continuation is None
+    assert parent.last < child.first
+    assert parent.end == child.first
+    assert child.first < child.last
+    assert child.last < child.end
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == False
+
+def test_split_none():
+    """patch.last <= parent_block.last
+
+    While awaiting at run()->asyncdl._fetch()
+    downloading parent_block proceeds, 
+    and parent.block.last exceeds patch.first.
+
+    In this case, fetched patch is all discarded,
+    and dlworker searches other processing block again. 
+    
+    ~~~~~~ before ~~~~~~
+
+                          patch.first
+     first                  |   last                   end
+       |####################|###################|-------|
+                            ^
+     @child_block
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+     
+     @fetched patch
+                            |-- patch --|
+                                      patch.last < parent_block.last                       .
+     
+                             |
+                             |
+                             V 
+    
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first                       last           end (before split)
+       |########################################|-------|
+                                                              .
+     @child_block
+                                           
+                            ............    -> discard all data.
+
+    """
+    parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert parent.last == 40000 #no change
+    assert parent.end == 60000 #no change
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == True #exclude during_split sequence
--- a/tests/testdata/dl_duplcheck/overlap/dp0-0.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-0.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-1.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-1.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-2.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-2.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-3.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-3.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-4.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-4.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-5.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-5.json
--- a/tests/testdata/dl_patch/pt0-0.json
+++ b/tests/testdata/dl_patch/pt0-0.json
--- a/tests/testdata/dl_patch/pt0-1.json
+++ b/tests/testdata/dl_patch/pt0-1.json
--- a/tests/testdata/dl_patch/pt0-3.json
+++ b/tests/testdata/dl_patch/pt0-3.json
--- a/tests/testdata/dl_patch/pt0-4.json
+++ b/tests/testdata/dl_patch/pt0-4.json
--- a/tests/testdata/dl_patch/pt0-5.json
+++ b/tests/testdata/dl_patch/pt0-5.json