Merge branch 'feature/downloader' into develop

2020-02-16 21:38:19 +09:00
parent 59defc568c 8b617551ad
commit 3e941c2cf1
34 changed files with 55145 additions and 2 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
 include requirements.txt
 include requirements_test.txt
-include README.MD
+include README.md
 global-exclude tests/*
 global-exclude pytchat/testrun*.py
--- a/error.json
+++ b/error.json
--- a/pytchat/config/init.py
+++ b/pytchat/config/init.py
@@ -4,7 +4,7 @@ from . import mylogger
 headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}

-def logger(module_name: str, loglevel = None):
+def logger(module_name: str, loglevel = logging.DEBUG):
    module_logger = mylogger.get_logger(module_name, loglevel = loglevel)
    return module_logger

--- a/pytchat/exceptions.py
+++ b/pytchat/exceptions.py
@@ -41,3 +41,6 @@ class IllegalFunctionCall(Exception):
    get()を呼び出した場合の例外
    '''
    pass
+
+class InvalidVideoIdException(Exception):
+    pass
--- a/pytchat/tool/init.py
+++ b/pytchat/tool/init.py
--- a/pytchat/tool/download/init.py
+++ b/pytchat/tool/download/init.py
--- a/pytchat/tool/download/asyncdl.py
+++ b/pytchat/tool/download/asyncdl.py
@@ -0,0 +1,132 @@
+
+import aiohttp
+import asyncio
+import json
+from . import parser
+from . block import Block
+from . dlworker import DownloadWorker
+from . patch import Patch
+from ... import config 
+from ... paramgen import arcparam
+from concurrent.futures import CancelledError
+from urllib.parse import quote
+
+headers = config.headers
+REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
+             "get_live_chat_replay?continuation="
+
+def _split(start, end, count, min_interval_sec = 120):
+    """
+    Split section from `start` to `end` into `count` pieces,
+    and returns the beginning of each piece. 
+    The `count` is adjusted so that the length of each piece
+    is no smaller than `min_interval`.
+
+    Returns:
+    --------
+        List of the offset of each block's first chat data.
+    """
+    
+    if not (isinstance(start,int) or isinstance(start,float)) or \
+       not (isinstance(end,int) or isinstance(end,float)):
+        raise ValueError("start/end must be int or float")
+    if not isinstance(count,int):
+        raise ValueError("count must be int")
+    if start>end:
+        raise ValueError("end must be equal to or greater than start.")
+    if count<1:
+        raise ValueError("count must be equal to or greater than 1.")
+    if (end-start)/count < min_interval_sec:
+        count = int((end-start)/min_interval_sec) 
+        if count == 0 : count = 1
+    interval= (end-start)/count 
+    
+    if count == 1:
+        return [start]
+    return sorted( list(set( [int(start + interval*j)
+        for j in range(count) ])))
+
+def ready_blocks(video_id, duration, div, callback):
+    if div <= 0: raise ValueError
+
+    async def _get_blocks( video_id, duration, div, callback):
+        async with aiohttp.ClientSession() as session:
+            tasks = [_create_block(session, video_id,  seektime, callback)
+                for  seektime in _split(-1, duration, div)]
+            return await asyncio.gather(*tasks)
+
+    async def _create_block(session, video_id, seektime, callback):
+        continuation = arcparam.getparam(video_id, seektime = seektime)
+        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        async with session.get(url, headers = headers) as resp:
+            text = await resp.text()
+        next_continuation, actions = parser.parse(json.loads(text))
+        if actions:
+            first = parser.get_offset(actions[0])
+            last = parser.get_offset(actions[-1])
+            if callback:
+                callback(actions,last-first)
+            return Block(
+                continuation = next_continuation,
+                chat_data = actions,
+                first = first,
+                last = last
+            )
+    """
+    fetch initial blocks.
+    """  
+    loop = asyncio.get_event_loop()
+    blocks = loop.run_until_complete(
+        _get_blocks(video_id, duration, div, callback))
+    return blocks
+
+def download_patch(callback, blocks, video_id):
+
+    async def _allocate_workers():
+        workers = [
+            DownloadWorker(
+                fetch = _fetch,  block = block,
+                blocks = blocks, video_id = video_id
+            )
+            for block in blocks
+        ]
+        async with aiohttp.ClientSession() as session:
+            tasks = [worker.run(session) for worker in workers]
+            return await asyncio.gather(*tasks)    
+
+    async def _fetch(continuation,session) -> Patch:
+        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+        async with session.get(url,headers = config.headers) as resp:
+            chat_json = await resp.text()
+        continuation, actions = parser.parse(json.loads(chat_json))
+        if actions:
+            last = parser.get_offset(actions[-1])
+            first = parser.get_offset(actions[0])
+            if callback:
+                callback(actions, last - first)
+            return Patch(actions, continuation, first, last)
+        return Patch()
+    """
+    allocate workers and assign blocks.
+    """   
+    loop = asyncio.get_event_loop()
+    try:
+        loop.run_until_complete(_allocate_workers())
+    except CancelledError:
+        pass
+
+async def _shutdown():
+    print("\nshutdown...")
+    tasks = [t for t in asyncio.all_tasks()
+         if t is not asyncio.current_task()]
+    for task in tasks:
+        task.cancel()
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+
+def cancel():
+    loop = asyncio.get_event_loop()
+    loop.create_task(_shutdown())
+    
--- a/pytchat/tool/download/block.py
+++ b/pytchat/tool/download/block.py
@@ -0,0 +1,57 @@
+from . import parser
+class Block:
+    """Block object represents something like a box 
+    to join chunk of chatdata.
+
+    Parameter:
+    ---------
+    first : int :
+        videoOffsetTimeMs of the first chat_data 
+        (chat_data[0])
+        
+    last : int :
+        videoOffsetTimeMs of the last chat_data.
+        (chat_data[-1])
+
+        this value increases as fetching chatdata progresses.
+
+    end : int :
+        target videoOffsetTimeMs of last chat data for download,
+        equals to first videoOffsetTimeMs of next block.
+        when download worker reaches this offset, stop downloading.
+
+    continuation : str :
+        continuation param of last chat data.
+
+    chat_data : list 
+
+    done : bool :
+        whether this block has been downloaded.
+    
+    remaining : int :
+        remaining data to download.
+        equals end - last.
+    
+    is_last : bool :
+        whether this block is the last one in blocklist.
+
+    during_split : bool :
+        whether this block is in the process of during_split.
+        while True, this block is excluded from duplicate split procedure.
+    """
+    
+    __slots__ = ['first','last','end','continuation','chat_data','remaining',
+        'done','is_last','during_split']
+
+    def __init__(self, first = 0, last = 0, end = 0,
+                continuation = '', chat_data = [], is_last = False,
+                during_split = False):
+        self.first = first
+        self.last = last
+        self.end = end
+        self.continuation = continuation
+        self.chat_data = chat_data
+        self.done = False
+        self.remaining = self.end - self.last
+        self.is_last = is_last
+        self.during_split = during_split
--- a/pytchat/tool/download/dlworker.py
+++ b/pytchat/tool/download/dlworker.py
@@ -0,0 +1,87 @@
+from . import parser
+from . block import Block
+from . patch import Patch, fill, split
+from ... paramgen import arcparam
+
+class DownloadWorker:
+    """
+    DownloadWorker associates a download session with a block.
+
+    When the dlworker finishes downloading, the block
+    being downloaded is splitted and assigned the free dlworker.
+
+    Parameter
+    ----------
+    fetch : func :
+        download function of asyncdl
+
+    block : Block :
+        Block object that includes chat_data
+    
+    blocks : list :
+        List of Block(s)
+
+    video_id : str :
+
+    parent_block : Block :
+        the block from which current block is splitted 
+    """
+    __slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
+
+    def __init__(self, fetch, block, blocks, video_id ):
+        self.block = block
+        self.fetch = fetch
+        self.blocks = blocks
+        self.video_id = video_id
+        self.parent_block = None
+
+    async def run(self, session):
+        while self.block.continuation:
+            patch = await self.fetch(
+                self.block.continuation, session)
+            if patch.continuation is None:
+                """TODO : make the dlworker assigned to the last block
+                to work more than twice as possible.
+                """
+                break
+            if self.parent_block:
+                split(self.parent_block, self.block, patch)
+                self.parent_block = None
+            else:    
+                fill(self.block, patch)
+            if self.block.continuation is None:
+                """finished downloading this block """
+                self.block.done = True
+                self.block = _search_new_block(self)
+
+def _search_new_block(worker) -> Block:
+    index, undone_block = _get_undone_block(worker.blocks)
+    if undone_block is None:
+        return Block(continuation = None)
+    mean = (undone_block.last + undone_block.end)/2
+    continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
+    worker.parent_block = undone_block
+    worker.parent_block.during_split = True
+    new_block = Block(
+        end =  undone_block.end,
+        chat_data = [], 
+        continuation = continuation,
+        during_split = True,
+        is_last = worker.parent_block.is_last)
+    worker.blocks.insert(index+1, new_block)
+    return new_block
+
+def _get_undone_block(blocks) -> (int, Block):
+    min_interval_ms = 120000
+    max_remaining = 0
+    undone_block = None
+    index_undone_block = 0
+    for index, block in enumerate(blocks):
+        if block.done or block.during_split:
+            continue
+        remaining = block.remaining
+        if remaining > max_remaining and remaining > min_interval_ms:
+            index_undone_block = index
+            undone_block = block
+            max_remaining = remaining
+    return index_undone_block, undone_block
--- a/pytchat/tool/download/downloader.py
+++ b/pytchat/tool/download/downloader.py
@@ -0,0 +1,89 @@
+from . import asyncdl
+from . import duplcheck 
+from . import parser
+from .. videoinfo import VideoInfo
+from ... import config
+from ... exceptions import InvalidVideoIdException
+
+logger = config.logger(__name__)
+headers=config.headers
+
+class Downloader:
+    def __init__(self, video_id, duration, div, callback):
+        if not isinstance(div ,int) or div < 1:
+            raise ValueError('div must be positive integer.')
+        elif div > 10:
+            div = 10
+        if not isinstance(duration ,int) or duration < 1:
+            raise ValueError('duration must be positive integer.')
+        self.video_id = video_id
+        self.duration = duration
+        self.div = div
+        self.callback = callback
+        self.blocks = []
+
+    def _ready_blocks(self):
+        blocks = asyncdl.ready_blocks(
+            self.video_id, self.duration, self.div, self.callback)
+        self.blocks = [block for block in blocks if block]
+        return self  
+
+    def _remove_duplicate_head(self):
+        self.blocks = duplcheck.remove_duplicate_head(self.blocks)
+        return self
+
+    def _set_block_end(self):
+        for i in range(len(self.blocks)-1):
+            self.blocks[i].end = self.blocks[i+1].first
+        self.blocks[-1].end = self.duration*1000
+        self.blocks[-1].is_last =True
+        return self
+
+    def _remove_overlap(self):
+        self.blocks = duplcheck.remove_overlap(self.blocks)
+        return self
+
+    def _download_blocks(self):
+        asyncdl.download_patch(self.callback, self.blocks, self.video_id)
+        return self
+
+    def _remove_duplicate_tail(self):
+        self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
+        return self
+
+    def _combine(self):
+        ret = []
+        for block in self.blocks:
+            ret.extend(block.chat_data) 
+        return ret
+
+    def download(self):
+        return (
+            self._ready_blocks()
+                ._remove_duplicate_head()
+                ._set_block_end()
+                ._remove_overlap()
+                ._download_blocks()
+                ._remove_duplicate_tail()
+                ._combine()
+        )
+
+def download(video_id, div = 1, callback = None, processor = None):
+    duration = 0
+    try:
+        duration = VideoInfo(video_id).get("duration")
+    except InvalidVideoIdException:
+        raise
+    if duration == 0:
+        print("video is live.")
+        return []
+    data = Downloader(video_id, duration, div, callback).download()
+    if processor is None:
+        return data
+    return processor.process(
+        [{'video_id':None,'timeout':1,'chatdata' : [action
+        ["replayChatItemAction"]["actions"][0] for action in data]}]
+    )
+
+def cancel():
+    asyncdl.cancel()
--- a/pytchat/tool/download/duplcheck.py
+++ b/pytchat/tool/download/duplcheck.py
@@ -0,0 +1,153 @@
+from . import parser
+
+def check_duplicate(chatdata):
+    max_range = len(chatdata)-1 
+    tbl_offset = [None] * max_range
+    tbl_id = [None] * max_range
+    tbl_type = [None] * max_range
+
+    def create_table(chatdata, max_range):
+        for i in range(max_range):
+            tbl_offset[i] = parser.get_offset(chatdata[i])
+            tbl_id[i] = parser.get_id(chatdata[i]) 
+            tbl_type[i] = parser.get_type(chatdata[i])
+
+    def is_duplicate(i, j):
+        return ( 
+            tbl_offset[i] == tbl_offset[j]
+                and
+            tbl_id[i] == tbl_id[j]
+                and
+            tbl_type[i] == tbl_type[j]
+        )
+    print("creating table...")
+    create_table(chatdata,max_range)
+    print("searching duplicate data...")
+    return [{ "i":{
+                "index" : i, "id" : parser.get_id(chatdata[i]),
+                "offsetTime" : parser.get_offset(chatdata[i]),
+                "type" : parser.get_type(chatdata[i])
+                },
+            "j":{
+                "index" : j, "id" : parser.get_id(chatdata[j]),
+                "offsetTime" : parser.get_offset(chatdata[j]),
+                "type" : parser.get_type(chatdata[j])
+                }
+            }
+        for i in range(max_range) for j in range(i+1,max_range) 
+        if is_duplicate(i,j)]
+
+
+def check_duplicate_offset(chatdata):
+    max_range = len(chatdata)
+    tbl_offset = [None] * max_range
+    tbl_id = [None] * max_range
+    tbl_type = [None] * max_range
+
+    def create_table(chatdata, max_range):
+        for i in range(max_range):
+            tbl_offset[i] = parser.get_offset(chatdata[i])
+            tbl_id[i] = parser.get_id(chatdata[i]) 
+            tbl_type[i] = parser.get_type(chatdata[i])
+
+    def is_duplicate(i, j):
+        return ( 
+            tbl_offset[i] == tbl_offset[j]
+                 and
+            tbl_id[i] == tbl_id[j]
+        )
+
+    print("creating table...")
+    create_table(chatdata,max_range)
+    print("searching duplicate data...")
+
+    return [{
+                "index" : i, "id" : tbl_id[i],
+                "offsetTime" : tbl_offset[i],
+                "type:" : tbl_type[i]
+            }
+        for i in range(max_range-1)
+        if is_duplicate(i,i+1)]
+
+def remove_duplicate_head(blocks):
+    if len(blocks) == 1 : return blocks
+
+    def is_duplicate_head(index):
+
+        if len(blocks[index].chat_data) == 0:
+            return True
+        elif len(blocks[index+1].chat_data) == 0:
+            return False
+        
+        id_0 = parser.get_id(blocks[index].chat_data[0])
+        id_1 = parser.get_id(blocks[index+1].chat_data[0])
+        type_0 = parser.get_type(blocks[index].chat_data[0])
+        type_1 = parser.get_type(blocks[index+1].chat_data[0])
+        return (
+            blocks[index].first == blocks[index+1].first
+                and
+            id_0 == id_1
+                and
+            type_0 == type_1
+        )
+    ret = [blocks[i] for i in range(len(blocks)-1)
+        if (len(blocks[i].chat_data)>0 and 
+        not is_duplicate_head(i) )]
+    ret.append(blocks[-1])
+    return ret
+
+def remove_duplicate_tail(blocks):
+    if len(blocks) == 1 : return blocks    
+
+    def is_duplicate_tail(index):
+        if len(blocks[index].chat_data) == 0:
+            return True
+        elif len(blocks[index-1].chat_data) == 0:
+            return False
+        id_0 = parser.get_id(blocks[index-1].chat_data[-1])
+        id_1 = parser.get_id(blocks[index].chat_data[-1])
+        type_0 = parser.get_type(blocks[index-1].chat_data[-1])
+        type_1 = parser.get_type(blocks[index].chat_data[-1])
+        return (
+            blocks[index-1].last == blocks[index].last
+                and
+            id_0 == id_1
+                and
+            type_0 == type_1
+        )
+
+    ret = [blocks[i] for i in range(0,len(blocks))
+        if i == 0 or not  is_duplicate_tail(i) ]
+    return ret
+
+def remove_overlap(blocks):
+    """
+    Fix overlapped blocks after ready_blocks().
+    Align the last offset of each block to the first offset 
+    of next block (equals `end` offset of each block).
+    """
+    if len(blocks) == 1 : return blocks
+
+    for block in blocks:
+        if block.is_last:
+            break
+        if len(block.chat_data)==0:
+            continue
+        block_end = block.end
+        if block.last >= block_end:
+            for line in reversed(block.chat_data):
+                if parser.get_offset(line) < block_end:
+                    break
+                block.chat_data.pop()
+            block.last = parser.get_offset(line)
+            block.remaining=0
+            block.done=True
+            block.continuation = None
+    return blocks
+    
+        
+
+def _dump(blocks):
+    print(f"----------        first         last         end---")
+    for i,block in enumerate(blocks):
+        print(f"block[{i:3}]   {block.first:>10}   {block.last:>10}  {block.end:>10}")
--- a/pytchat/tool/download/parser.py
+++ b/pytchat/tool/download/parser.py
@@ -0,0 +1,54 @@
+import json
+from ... import config
+from ... exceptions import ( 
+    ResponseContextError, 
+    NoContentsException, 
+    NoContinuationsException )
+
+logger = config.logger(__name__)
+
+def parse(jsn):
+    """
+    Parse replay chat data.
+    Parameter:
+    ----------
+    jsn : dict
+        JSON of replay chat data.
+    Returns:
+    ------
+        continuation : str
+        actions : list
+
+    """
+    if jsn is None: 
+        raise ValueError("parameter JSON is None")
+    if jsn['response']['responseContext'].get('errors'):
+        raise ResponseContextError(
+    'video_id is invalid or private/deleted.')
+    contents=jsn['response'].get('continuationContents')
+    if contents is None:
+        raise NoContentsException('No chat data.')
+
+    cont = contents['liveChatContinuation']['continuations'][0]
+    if cont is None:
+        raise NoContinuationsException('No Continuation')
+    metadata = cont.get('liveChatReplayContinuationData')
+    if metadata:
+        continuation = metadata.get("continuation")
+        actions = contents['liveChatContinuation'].get('actions')
+        return continuation, actions
+    return None, []
+
+
+def get_offset(item):
+    return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
+
+def get_id(item):
+    return list((list(item['replayChatItemAction']["actions"][0].values()
+                )[0])['item'].values())[0].get('id')
+
+def get_type(item):
+    return list((list(item['replayChatItemAction']["actions"][0].values()
+                )[0])['item'].keys())[0]
+
+
--- a/pytchat/tool/download/patch.py
+++ b/pytchat/tool/download/patch.py
@@ -0,0 +1,54 @@
+from . import parser
+from . block import Block
+from typing import NamedTuple
+
+class Patch(NamedTuple):
+    """
+    Patch represents chunk of chat data
+    which is fetched by asyncdl.download_patch._fetch().
+    """
+    chats : list = []
+    continuation : str = None
+    first : int = None
+    last : int = None
+
+def fill(block:Block, patch:Patch):
+    block_end = block.end
+    if patch.last < block_end or block.is_last:
+        set_patch(block, patch)
+        return
+    for line in reversed(patch.chats):
+        line_offset = parser.get_offset(line)
+        if line_offset < block_end:
+            break
+        patch.chats.pop()
+        
+    set_patch(block, patch._replace(
+        continuation = None,
+        last = line_offset
+        )
+    )
+    block.remaining=0
+    block.done=True
+
+
+def split(parent_block:Block, child_block:Block, patch:Patch):
+    parent_block.during_split = False
+    """patch overlaps with parent_block"""
+    if patch.first <= parent_block.last:
+        child_block.continuation = None
+        ''' Leave child_block.during_split == True 
+         to exclude from during_split sequence.'''
+        return    
+    child_block.during_split = False
+    child_block.first=patch.first
+    parent_block.end =patch.first
+    fill(child_block, patch)
+    
+
+def set_patch(block:Block, patch:Patch):
+    block.continuation = patch.continuation
+    block.chat_data.extend(patch.chats)
+    block.last = patch.last
+    block.remaining = block.end-block.last        
+
--- a/pytchat/tool/videoinfo.py
+++ b/pytchat/tool/videoinfo.py
@@ -0,0 +1,42 @@
+import json 
+import re
+import requests
+from .. import config
+from .. import util
+from ..exceptions import InvalidVideoIdException 
+headers = config.headers
+pattern=re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);")
+
+class VideoInfo:
+    def __init__(self,video_id):
+        self.video_id = video_id
+        self.info = self._get_info(video_id)
+
+    def _get_info(self,video_id):
+        url = f"https://www.youtube.com/embed/{video_id}"
+        resp= requests.get(url, headers = headers)
+        resp.raise_for_status()
+        return  self._parse(resp.text)
+
+    def _parse(self,html):
+        result = re.search(pattern, html)
+        res= json.loads(result.group(1))
+        response = res["args"].get("embedded_player_response")
+        if response is None:
+            raise InvalidVideoIdException("動画IDが無効です。")
+        renderer = (json.loads(response))["embedPreview"]["thumbnailPreviewRenderer"]
+        return {
+            "duration": int(renderer["videoDurationSeconds"]) if renderer.get("videoDurationSeconds") else 0,
+            "title" : [''.join(run["text"]) for run in renderer["title"]["runs"]][0] if renderer.get("title") else None,
+            "channelId" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["channelThumbnailEndpoint"]["channelThumbnailEndpoint"]["urlEndpoint"]["urlEndpoint"]["url"][9:] if renderer.get("videoDetails") else None,
+            "authorProfileImage" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["channelThumbnail"]["thumbnails"][0]["url"] if renderer.get("videoDetails") else None,
+            "thumbnail" : renderer["defaultThumbnail"]["thumbnails"][2]["url"] if renderer.get("defaultThumbnail") else None,
+            "channelName" : renderer["videoDetails"]["embeddedPlayerOverlayVideoDetailsRenderer"]["expandedRenderer"]["embeddedPlayerOverlayVideoDetailsExpandedRenderer"]["title"]["runs"][0]["text"] if renderer.get("videoDetails") else None,
+            "movingThumbnail" : renderer["movingThumbnail"]["thumbnails"][0]["url"] if renderer.get("movingThumbnail") else None
+        }
+
+    def get(self,item):
+        return self.info.get(item)
+    
+
+
--- a/tests/test_dl_asyncdl.py
+++ b/tests/test_dl_asyncdl.py
@@ -0,0 +1,77 @@
+import aiohttp
+import asyncio
+import json
+from pytchat.tool.download import parser
+import sys
+import time
+from aioresponses import aioresponses
+from concurrent.futures import CancelledError
+from pytchat.tool.download import asyncdl
+
+def _open_file(path):
+    with open(path,mode ='r',encoding = 'utf-8') as f:
+        return f.read()
+
+
+def test_asyncdl_split():
+
+    ret = asyncdl._split(0,1000,1)
+    assert ret == [0]
+
+    ret = asyncdl._split(1000,1000,10)
+    assert ret == [1000]
+
+    ret = asyncdl._split(0,1000,5)
+    assert ret == [0,200,400,600,800]
+
+    ret = asyncdl._split(10.5, 700.3, 5)
+    assert ret == [10, 148, 286, 424, 562]
+
+
+    ret = asyncdl._split(0,500,5)
+    assert ret == [0,125,250,375]
+    
+    ret = asyncdl._split(0,500,500)
+    assert ret == [0,125,250,375]
+    
+    ret = asyncdl._split(-1,1000,5)
+    assert ret == [-1, 199, 399, 599, 799]
+    
+    """invalid argument order"""
+    try:
+        ret = asyncdl._split(500,0,5)
+        assert False
+    except ValueError:
+        assert True
+
+    """invalid count"""
+    try:
+        ret = asyncdl._split(0,500,-1)
+        assert False
+    except ValueError:
+        assert True
+
+    try:
+        ret = asyncdl._split(0,500,0)
+        assert False
+    except ValueError:
+        assert True
+
+    """invalid argument type"""
+    try:
+        ret = asyncdl._split(0,5000,5.2)
+        assert False
+    except ValueError:
+        assert True
+
+    try:
+        ret = asyncdl._split(0,5000,"test")
+        assert False
+    except ValueError:
+        assert True
+
+    try:
+        ret = asyncdl._split([0,1],5000,5)
+        assert False
+    except ValueError:
+        assert True
--- a/tests/test_dl_duplcheck.py
+++ b/tests/test_dl_duplcheck.py
@@ -0,0 +1,128 @@
+import aiohttp
+import asyncio
+import json
+import os, sys
+import time
+from pytchat.tool.download import duplcheck
+from pytchat.tool.download import parser
+from pytchat.tool.download.block import Block
+from pytchat.tool.download.duplcheck import _dump
+def _open_file(path):
+    with open(path,mode ='r',encoding = 'utf-8') as f:
+        return f.read()
+
+
+
+def test_overlap():
+    """
+    test overlap data 
+        operation : [0]  [2] [3]  [4] -> last :align to end
+                    [1] , [5] -> no change
+        
+    """
+
+    def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
+        )[1]
+
+    blocks = (
+        Block(first = 0,    last= 12771, end=  9890,chat_data = load_chatdata("dp0-0.json")),     
+        Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")), 
+        Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")), 
+        Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")), 
+        Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")), 
+        Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
+    )
+    result = duplcheck.remove_overlap(blocks)
+    #dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first), 
+    #but must be aligne to the most close and smaller value:9779.
+    assert result[0].last == 9779
+    
+    assert result[1].last == 15800
+    
+    assert result[2].last == 32196
+    
+    assert result[3].last == 41116
+    
+    assert result[4].last == 52384
+    
+    #the last block must be always added to result.
+    assert result[5].last == 62875
+    
+def test_duplicate_head():
+
+    def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
+        )[1]
+
+    """
+    test duplicate head data 
+        operation : [0] , [1]  -> discard [0]
+                    [1] , [2]  -> discard [1]
+                    [2] , [3]  -> append  [2]
+                    [3] , [4]  -> discard [3]
+                    [4] , [5]  -> append  [4]
+                    append [5]
+
+        result    : [0] , [3] , [5] 
+    """
+
+    #chat data offsets are ignored.
+    blocks = (
+        Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),     
+        Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")), 
+        Block(first = 0, last =45146, chat_data = load_chatdata("dp0-2.json")), 
+        Block(first = 20244, last =60520, chat_data = load_chatdata("dp0-3.json")), 
+        Block(first = 20244, last =62875, chat_data = load_chatdata("dp0-4.json")), 
+        Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
+    )
+    _dump(blocks)
+    result = duplcheck.remove_duplicate_head(blocks)
+    
+    assert len(result) == 3
+    assert result[0].first == blocks[2].first
+    assert result[0].last  == blocks[2].last
+    assert result[1].first == blocks[4].first
+    assert result[1].last  == blocks[4].last
+    assert result[2].first == blocks[5].first
+    assert result[2].last  == blocks[5].last
+
+def test_duplicate_tail():
+    """
+    test duplicate tail data 
+        operation : append [0]
+                    [0] , [1]  -> discard [1]
+                    [1] , [2]  -> append  [2]
+                    [2] , [3]  -> discard [3]
+                    [3] , [4]  -> append  [4]
+                    [4] , [5]  -> discard [5]
+
+        result    : [0] , [2] , [4] 
+    """
+    def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
+        )[1]
+    #chat data offsets are ignored.    
+    blocks = (
+        Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),     
+        Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")), 
+        Block(first = 10000,last = 45146, chat_data=load_chatdata("dp0-2.json")), 
+        Block(first = 20244,last = 45146, chat_data=load_chatdata("dp0-3.json")), 
+        Block(first = 20244,last = 62875, chat_data=load_chatdata("dp0-4.json")), 
+        Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
+    )
+
+    result = duplcheck.remove_duplicate_tail(blocks)
+    _dump(result)
+    assert len(result) == 3
+    assert result[0].first == blocks[0].first
+    assert result[0].last  == blocks[0].last
+    assert result[1].first == blocks[2].first
+    assert result[1].last  == blocks[2].last
+    assert result[2].first == blocks[4].first
+    assert result[2].last  == blocks[4].last
+
+
--- a/tests/test_patch.py
+++ b/tests/test_patch.py
@@ -0,0 +1,232 @@
+import aiohttp
+import asyncio
+import json
+import os, sys
+import time
+from aioresponses import aioresponses
+from pytchat.tool.download import duplcheck
+from pytchat.tool.download import parser
+from pytchat.tool.download.block import Block
+from pytchat.tool.download.patch import Patch, fill, split, set_patch
+from pytchat.tool.download.duplcheck import _dump
+def _open_file(path):
+    with open(path,mode ='r',encoding = 'utf-8') as f:
+        return f.read()
+
+def load_chatdata(filename):
+        return parser.parse(
+            json.loads(_open_file("tests/testdata/dl_patch/"+filename))
+        )[1]
+
+
+def test_split_0():
+    """
+    Normal case
+
+     @parent_block  (# = already downloaded)
+    
+     first    last                                     end
+       |########----------------------------------------|
+    
+
+     @child_block
+    
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+    
+
+     @fetched patch
+                            |-- patch --|
+    
+     
+                             |
+                             |
+                             V 
+    
+     @parent_block
+    
+     first    last         end (after split)   
+       |########------------|
+    
+     @child_block
+                          first       last            end            
+                            |###########---------------|
+    
+     @fetched patch
+                            |-- patch --|
+    """
+    parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert child.continuation == 'patch'
+    assert parent.last < child.first
+    assert parent.end == child.first
+    assert child.first < child.last
+    assert child.last < child.end
+    assert parent.during_split == False
+    assert child.during_split == False
+
+def test_split_1():
+    """patch.first <= parent_block.last
+
+    While awaiting at run()->asyncdl._fetch()
+    downloading parent_block proceeds, 
+    and parent.block.last exceeds patch.first.
+
+    In this case, fetched patch is all discarded,
+    and dlworker searches other processing block again. 
+
+    ~~~~~~ before ~~~~~~
+
+                          patch.first
+      first                  |   last                  end
+       |####################|#####|---------------------|
+                            ^
+     @child_block
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+     
+     @fetched patch
+                            |-- patch --|
+    
+     
+                             |
+                             |
+                             V 
+    
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first                       last                  end
+       |###########################|--------------------|
+    
+     @child_block
+                                
+                            ..............　-> 　discard all data
+                   
+    """
+    parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert parent.last == 33000 #no change
+    assert parent.end == 60000 #no change
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == True #exclude during_split sequence
+
+def test_split_2():
+    """child_block.end < patch.last:
+
+    Case the last offset of patch exceeds child_block.end.
+    In this case, remove overlapped data of patch.
+
+    ~~~~~~ before ~~~~~~
+
+     @parent_block  (# = already downloaded)
+     first    last                           end (before split)
+       |########------------------------------|
+    
+     @child_block
+     first = last = 0                        end=parent_end
+     -----------------------------------------|
+    
+    continuation:succeed from patch
+    
+     @fetched patch
+                            |-------- patch --------|
+    
+     
+                             |
+                             |
+                             V 
+
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first    last         end (after split)   
+       |########------------|
+
+     @child_block                                  old patch.end            
+                          first            last=end |
+                            |#################|......   cut extra data.
+                                                    ^
+    continuation : None (download complete)
+
+     @fetched patch                                 
+                            |-------- patch --------|
+    """
+    parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+     
+    split(parent,child,patch)
+
+    assert child.continuation is None
+    assert parent.last < child.first
+    assert parent.end == child.first
+    assert child.first < child.last
+    assert child.last < child.end
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == False
+
+def test_split_none():
+    """patch.last <= parent_block.last
+
+    While awaiting at run()->asyncdl._fetch()
+    downloading parent_block proceeds, 
+    and parent.block.last exceeds patch.first.
+
+    In this case, fetched patch is all discarded,
+    and dlworker searches other processing block again. 
+    
+    ~~~~~~ before ~~~~~~
+
+                          patch.first
+     first                  |   last                   end
+       |####################|###################|-------|
+                            ^
+     @child_block
+     first = last = 0                                  end=parent_end
+     ---------------------------------------------------|
+     
+     @fetched patch
+                            |-- patch --|
+                                      patch.last < parent_block.last                       .
+     
+                             |
+                             |
+                             V 
+    
+    ~~~~~~ after ~~~~~~
+
+     @parent_block
+     first                       last           end (before split)
+       |########################################|-------|
+                                                              .
+     @child_block
+                                           
+                            ............    -> discard all data.
+
+    """
+    parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
+    child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
+    patch = Patch(chats=load_chatdata('pt0-5.json'),
+        first=32500, last=34000, continuation='patch')
+    
+    split(parent,child,patch)
+
+    assert parent.last == 40000 #no change
+    assert parent.end == 60000 #no change
+    assert child.continuation is None
+    assert parent.during_split == False
+    assert child.during_split == True #exclude during_split sequence
--- a/tests/testdata/dl_duplcheck/head/dp0-0.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-0.json
--- a/tests/testdata/dl_duplcheck/head/dp0-1.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-1.json
--- a/tests/testdata/dl_duplcheck/head/dp0-2.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-2.json
--- a/tests/testdata/dl_duplcheck/head/dp0-3.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-3.json
--- a/tests/testdata/dl_duplcheck/head/dp0-4.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-4.json
--- a/tests/testdata/dl_duplcheck/head/dp0-5.json
+++ b/tests/testdata/dl_duplcheck/head/dp0-5.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-0.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-0.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-1.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-1.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-2.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-2.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-3.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-3.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-4.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-4.json
--- a/tests/testdata/dl_duplcheck/overlap/dp0-5.json
+++ b/tests/testdata/dl_duplcheck/overlap/dp0-5.json
--- a/tests/testdata/dl_patch/pt0-0.json
+++ b/tests/testdata/dl_patch/pt0-0.json
--- a/tests/testdata/dl_patch/pt0-1.json
+++ b/tests/testdata/dl_patch/pt0-1.json
--- a/tests/testdata/dl_patch/pt0-3.json
+++ b/tests/testdata/dl_patch/pt0-3.json
--- a/tests/testdata/dl_patch/pt0-4.json
+++ b/tests/testdata/dl_patch/pt0-4.json
--- a/tests/testdata/dl_patch/pt0-5.json
+++ b/tests/testdata/dl_patch/pt0-5.json