Files
pytchat-fork/tests/test_extract_patch.py
2020-02-26 23:47:33 +09:00

239 lines
7.3 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import aiohttp
import asyncio
import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool.extract import duplcheck
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
from pytchat.tool.extract.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
)[1]
def test_split_0():
"""
Normal case
~~~~~~ before ~~~~~~
@parent_block (# = already fetched)
first last end
|########----------------------------------------|
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (after split)
|########------------|
@child_block
first last end
|###########---------------|
@fetched patch
|-- patch --|
"""
parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert child.continuation == 'patch'
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert parent.during_split == False
assert child.during_split == False
def test_split_1():
"""patch.first <= parent_block.last
While awaiting at run()->asyncdl._fetch()
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and worker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|#####|---------------------|
^
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end
|###########################|--------------------|
@child_block
.............. ->  discard all data
"""
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert parent.last == 33000 #no change
assert parent.end == 60000 #no change
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == True #exclude during_split sequence
def test_split_2():
"""child_block.end < patch.last:
Case the last offset of patch exceeds child_block.end.
In this case, remove overlapped data of patch.
~~~~~~ before ~~~~~~
@parent_block (# = already fetched)
first last end (before split)
|########------------------------------|
@child_block
first = last = 0 end (=parent_end)
| |
continuation:succeed from patch
@fetched patch
|-------- patch --------|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (after split)
|########------------|
@child_block old patch.end
first last=end |
|#################|...... cut extra data.
^
continuation : None (extract complete)
@fetched patch
|-------- patch --------|
"""
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert child.continuation is None
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == False
def test_split_none():
"""patch.last <= parent_block.last
While awaiting at run()->asyncdl._fetch()
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and worker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|###################|-------|
^
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
patch.last < parent_block.last .
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (before split)
|########################################|-------|
@child_block
............ -> discard all data.
"""
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent,child,patch)
assert parent.last == 40000 #no change
assert parent.end == 60000 #no change
assert child.continuation is None
assert parent.during_split == False
assert child.during_split == True #exclude during_split sequence