240 lines
7.1 KiB
Python
240 lines
7.1 KiB
Python
import json
|
||
|
||
from pytchat.tool.extract import parser
|
||
from pytchat.tool.extract.block import Block
|
||
from pytchat.tool.extract.patch import Patch, split
|
||
|
||
|
||
def _open_file(path):
|
||
with open(path, mode='r', encoding='utf-8') as f:
|
||
return f.read()
|
||
|
||
|
||
def load_chatdata(filename):
|
||
return parser.parse(
|
||
json.loads(_open_file("tests/testdata/fetch_patch/" + filename))
|
||
)[1]
|
||
|
||
|
||
def test_split_0():
|
||
"""
|
||
Normal case
|
||
|
||
~~~~~~ before ~~~~~~
|
||
|
||
@parent_block (# = already fetched)
|
||
|
||
first last end
|
||
|########----------------------------------------|
|
||
|
||
|
||
@child_block
|
||
|
||
first = last = 0 end (=parent_end)
|
||
| |
|
||
|
||
|
||
@fetched patch
|
||
|-- patch --|
|
||
|
||
|
||
|
|
||
|
|
||
V
|
||
|
||
~~~~~~ after ~~~~~~
|
||
|
||
|
||
@parent_block
|
||
|
||
first last end (after split)
|
||
|########------------|
|
||
|
||
@child_block
|
||
first last end
|
||
|###########---------------|
|
||
|
||
@fetched patch
|
||
|-- patch --|
|
||
"""
|
||
parent = Block(first=0, last=4000, end=60000,
|
||
continuation='parent', during_split=True)
|
||
child = Block(first=0, last=0, end=60000,
|
||
continuation='mean', during_split=True)
|
||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||
first=32500, last=34000, continuation='patch')
|
||
|
||
split(parent, child, patch)
|
||
|
||
assert child.continuation == 'patch'
|
||
assert parent.last < child.first
|
||
assert parent.end == child.first
|
||
assert child.first < child.last
|
||
assert child.last < child.end
|
||
assert parent.during_split is False
|
||
assert child.during_split is False
|
||
|
||
|
||
def test_split_1():
|
||
"""patch.first <= parent_block.last
|
||
|
||
While awaiting at run()->asyncdl._fetch()
|
||
fetching parent_block proceeds,
|
||
and parent.block.last exceeds patch.first.
|
||
|
||
In this case, fetched patch is all discarded,
|
||
and worker searches other processing block again.
|
||
|
||
~~~~~~ before ~~~~~~
|
||
|
||
patch.first
|
||
first | last end
|
||
|####################|#####|---------------------|
|
||
^
|
||
@child_block
|
||
first = last = 0 end (=parent_end)
|
||
| |
|
||
|
||
@fetched patch
|
||
|-- patch --|
|
||
|
||
|
||
|
|
||
|
|
||
V
|
||
|
||
~~~~~~ after ~~~~~~
|
||
|
||
@parent_block
|
||
first last end
|
||
|###########################|--------------------|
|
||
|
||
@child_block
|
||
|
||
.............. -> discard all data
|
||
|
||
"""
|
||
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
|
||
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||
first=32500, last=34000, continuation='patch')
|
||
|
||
split(parent, child, patch)
|
||
|
||
assert parent.last == 33000 # no change
|
||
assert parent.end == 60000 # no change
|
||
assert child.continuation is None
|
||
assert parent.during_split is False
|
||
assert child.during_split is True # exclude during_split sequence
|
||
|
||
|
||
def test_split_2():
|
||
"""child_block.end < patch.last:
|
||
|
||
Case the last offset of patch exceeds child_block.end.
|
||
In this case, remove overlapped data of patch.
|
||
|
||
~~~~~~ before ~~~~~~
|
||
|
||
@parent_block (# = already fetched)
|
||
first last end (before split)
|
||
|########------------------------------|
|
||
|
||
@child_block
|
||
first = last = 0 end (=parent_end)
|
||
| |
|
||
|
||
continuation:succeed from patch
|
||
|
||
@fetched patch
|
||
|-------- patch --------|
|
||
|
||
|
||
|
|
||
|
|
||
V
|
||
|
||
~~~~~~ after ~~~~~~
|
||
|
||
@parent_block
|
||
first last end (after split)
|
||
|########------------|
|
||
|
||
@child_block old patch.end
|
||
first last=end |
|
||
|#################|...... cut extra data.
|
||
^
|
||
continuation : None (extract complete)
|
||
|
||
@fetched patch
|
||
|-------- patch --------|
|
||
"""
|
||
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
|
||
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
|
||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||
first=32500, last=34000, continuation='patch')
|
||
|
||
split(parent, child, patch)
|
||
|
||
assert child.continuation is None
|
||
assert parent.last < child.first
|
||
assert parent.end == child.first
|
||
assert child.first < child.last
|
||
assert child.last < child.end
|
||
assert child.continuation is None
|
||
assert parent.during_split is False
|
||
assert child.during_split is False
|
||
|
||
|
||
def test_split_none():
|
||
"""patch.last <= parent_block.last
|
||
|
||
While awaiting at run()->asyncdl._fetch()
|
||
fetching parent_block proceeds,
|
||
and parent.block.last exceeds patch.first.
|
||
|
||
In this case, fetched patch is all discarded,
|
||
and worker searches other processing block again.
|
||
|
||
~~~~~~ before ~~~~~~
|
||
|
||
patch.first
|
||
first | last end
|
||
|####################|###################|-------|
|
||
^
|
||
@child_block
|
||
first = last = 0 end (=parent_end)
|
||
| |
|
||
|
||
@fetched patch
|
||
|-- patch --|
|
||
patch.last < parent_block.last .
|
||
|
||
|
|
||
|
|
||
V
|
||
|
||
~~~~~~ after ~~~~~~
|
||
|
||
@parent_block
|
||
first last end (before split)
|
||
|########################################|-------|
|
||
|
||
@child_block
|
||
|
||
............ -> discard all data.
|
||
|
||
"""
|
||
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
|
||
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||
first=32500, last=34000, continuation='patch')
|
||
|
||
split(parent, child, patch)
|
||
|
||
assert parent.last == 40000 # no change
|
||
assert parent.end == 60000 # no change
|
||
assert child.continuation is None
|
||
assert parent.during_split is False
|
||
assert child.during_split is True # exclude during_split sequence
|