Aggregate return values with patch class

This commit is contained in:
taizan-hokuto
2020-02-16 20:43:12 +09:00
parent 6fdb3bf8cf
commit c4cf424702
23 changed files with 35254 additions and 327 deletions

View File

@@ -1,19 +1,19 @@
import aiohttp
import asyncio
import json
from pytchat.tool import parser
from pytchat.tool.download import parser
import sys
import time
from aioresponses import aioresponses
from concurrent.futures import CancelledError
from pytchat.tool import asyncdl
from pytchat.tool.download import asyncdl
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_asyncdl_split(mocker):
def test_asyncdl_split():
ret = asyncdl._split(0,1000,1)
assert ret == [0]

View File

@@ -3,60 +3,73 @@ import asyncio
import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool import duplcheck
from pytchat.tool import parser
from pytchat.tool.block import Block
from pytchat.tool.duplcheck import _dump
from pytchat.tool.download import duplcheck
from pytchat.tool.download import parser
from pytchat.tool.download.block import Block
from pytchat.tool.download.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def load_chatdata(filename):
def test_overlap():
"""
test overlap data
operation : [0] [2] [3] [4] -> last :align to end
[1] , [5] -> no change
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
)[1]
blocks = (
Block(first = 0, last= 12771, end= 9890,chat_data = load_chatdata("dp0-0.json")),
Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")),
Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")),
Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")),
Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")),
Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
)
result = duplcheck.remove_overlap(blocks)
#dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
#but must be aligne to the most close and smaller value:9779.
assert result[0].last == 9779
assert result[1].last == 15800
assert result[2].last == 32196
assert result[3].last == 41116
assert result[4].last == 52384
#the last block must be always added to result.
assert result[5].last == 62875
def test_duplicate_head():
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
def test_overwrap(mocker):
"""
test overwrap data
operation : [0] , [1] -> discard [1]
[0] , [2] , [3] -> discard [2]
[3] , [4] , [5] -> discard [4]
result : [0] , [3] , [5]
"""
blocks = (
Block(first = 0,last= 38771, chat_data = load_chatdata("dp0-0.json")),
Block(first = 9890,last= 38771, chat_data = load_chatdata("dp0-1.json")),
Block(first = 20244,last= 45146, chat_data = load_chatdata("dp0-2.json")),
Block(first = 32476,last= 60520, chat_data = load_chatdata("dp0-3.json")),
Block(first = 41380,last= 62875, chat_data = load_chatdata("dp0-4.json")),
Block(first = 52568,last= 62875, chat_data = load_chatdata("dp0-5.json"))
)
result = duplcheck.overwrap(blocks)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[3].first
assert result[1].last == blocks[3].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_head(mocker):
"""
test duplicate head data
operation : [0] , [1] -> discard [0]
[1] , [2] -> discard [1]
[2] , [3] -> append [2]
[2] , [3] -> append [2]
[3] , [4] -> discard [3]
[4] , [5] -> append [4]
[4] , [5] -> append [4]
append [5]
result : [0] , [3] , [5]
"""
#chat data offsets are ignored.
blocks = (
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
@@ -66,7 +79,7 @@ def test_duplicate_head(mocker):
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
)
_dump(blocks)
result = duplcheck.duplicate_head(blocks)
result = duplcheck.remove_duplicate_head(blocks)
assert len(result) == 3
assert result[0].first == blocks[2].first
@@ -76,19 +89,23 @@ def test_duplicate_head(mocker):
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_tail(mocker):
def test_duplicate_tail():
"""
test duplicate tail data
operation : append [0]
[0] , [1] -> discard [1]
[1] , [2] -> append [2]
[1] , [2] -> append [2]
[2] , [3] -> discard [3]
[3] , [4] -> append [4]
[3] , [4] -> append [4]
[4] , [5] -> discard [5]
result : [0] , [2] , [4]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
)[1]
#chat data offsets are ignored.
blocks = (
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
@@ -98,7 +115,7 @@ def test_duplicate_tail(mocker):
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
)
result = duplcheck.duplicate_tail(blocks)
result = duplcheck.remove_duplicate_tail(blocks)
_dump(result)
assert len(result) == 3
assert result[0].first == blocks[0].first

232
tests/test_patch.py Normal file
View File

@@ -0,0 +1,232 @@
import aiohttp
import asyncio
import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool.download import duplcheck
from pytchat.tool.download import parser
from pytchat.tool.download.block import Block
from pytchat.tool.download.patch import Patch, fill, split, set_patch
from pytchat.tool.download.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_patch/"+filename))
)[1]
def test_split_0():
"""
Normal case
@parent_block (# = already downloaded)
first last end
|########----------------------------------------|
@child_block
first = last = 0 end=parent_end
---------------------------------------------------|
@fetched patch
|-- patch --|
|
|
V
@parent_block
first last end (after split)
|########------------|
@child_block
first last end
|###########---------------|
@fetched patch
|-- patch --|
"""
parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert child.continuation == 'patch'
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert parent.during_split == False
assert child.during_split == False
def test_split_1():
"""patch.first <= parent_block.last
While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and dlworker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|#####|---------------------|
^
@child_block
first = last = 0 end=parent_end
---------------------------------------------------|
@fetched patch
|-- patch --|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end
|###########################|--------------------|
@child_block
.............. ->  discard all data
"""
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert parent.last == 33000 #no change
assert parent.end == 60000 #no change
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == True #exclude during_split sequence
def test_split_2():
"""child_block.end < patch.last:
Case the last offset of patch exceeds child_block.end.
In this case, remove overlapped data of patch.
~~~~~~ before ~~~~~~
@parent_block (# = already downloaded)
first last end (before split)
|########------------------------------|
@child_block
first = last = 0 end=parent_end
-----------------------------------------|
continuation:succeed from patch
@fetched patch
|-------- patch --------|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (after split)
|########------------|
@child_block old patch.end
first last=end |
|#################|...... cut extra data.
^
continuation : None (download complete)
@fetched patch
|-------- patch --------|
"""
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert child.continuation is None
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == False
def test_split_none():
"""patch.last <= parent_block.last
While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and dlworker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|###################|-------|
^
@child_block
first = last = 0 end=parent_end
---------------------------------------------------|
@fetched patch
|-- patch --|
patch.last < parent_block.last .
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (before split)
|########################################|-------|
.
@child_block
............ -> discard all data.
"""
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert parent.last == 40000 #no change
assert parent.end == 60000 #no change
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == True #exclude during_split sequence

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3078
tests/testdata/dl_patch/pt0-0.json vendored Normal file

File diff suppressed because it is too large Load Diff

3078
tests/testdata/dl_patch/pt0-1.json vendored Normal file

File diff suppressed because it is too large Load Diff

3078
tests/testdata/dl_patch/pt0-3.json vendored Normal file

File diff suppressed because it is too large Load Diff

3078
tests/testdata/dl_patch/pt0-4.json vendored Normal file

File diff suppressed because it is too large Load Diff

3078
tests/testdata/dl_patch/pt0-5.json vendored Normal file

File diff suppressed because it is too large Load Diff