Files
pytchat-fork/tests/test_extract_duplcheck.py
2020-02-26 22:12:41 +09:00

129 lines
4.6 KiB
Python

import aiohttp
import asyncio
import json
import os, sys
import time
from pytchat.tool.extract import duplcheck
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def test_overlap():
"""
test overlap data
operation : [0] [2] [3] [4] -> last :align to end
[1] , [5] -> no change
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/extract_duplcheck/overlap/"+filename))
)[1]
blocks = (
Block(first = 0, last= 12771, end= 9890,chat_data = load_chatdata("dp0-0.json")),
Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")),
Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")),
Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")),
Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")),
Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
)
result = duplcheck.remove_overlap(blocks)
#dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
#but must be aligne to the most close and smaller value:9779.
assert result[0].last == 9779
assert result[1].last == 15800
assert result[2].last == 32196
assert result[3].last == 41116
assert result[4].last == 52384
#the last block must be always added to result.
assert result[5].last == 62875
def test_duplicate_head():
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
)[1]
"""
test duplicate head data
operation : [0] , [1] -> discard [0]
[1] , [2] -> discard [1]
[2] , [3] -> append [2]
[3] , [4] -> discard [3]
[4] , [5] -> append [4]
append [5]
result : [2] , [4] , [5]
"""
#chat data offsets are ignored.
blocks = (
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
Block(first = 0, last =45146, chat_data = load_chatdata("dp0-2.json")),
Block(first = 20244, last =60520, chat_data = load_chatdata("dp0-3.json")),
Block(first = 20244, last =62875, chat_data = load_chatdata("dp0-4.json")),
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
)
_dump(blocks)
result = duplcheck.remove_duplicate_head(blocks)
assert len(result) == 3
assert result[0].first == blocks[2].first
assert result[0].last == blocks[2].last
assert result[1].first == blocks[4].first
assert result[1].last == blocks[4].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_tail():
"""
test duplicate tail data
operation : append [0]
[0] , [1] -> discard [1]
[1] , [2] -> append [2]
[2] , [3] -> discard [3]
[3] , [4] -> append [4]
[4] , [5] -> discard [5]
result : [0] , [2] , [4]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
)[1]
#chat data offsets are ignored.
blocks = (
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
Block(first = 10000,last = 45146, chat_data=load_chatdata("dp0-2.json")),
Block(first = 20244,last = 45146, chat_data=load_chatdata("dp0-3.json")),
Block(first = 20244,last = 62875, chat_data=load_chatdata("dp0-4.json")),
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
)
result = duplcheck.remove_duplicate_tail(blocks)
_dump(result)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[2].first
assert result[1].last == blocks[2].last
assert result[2].first == blocks[4].first
assert result[2].last == blocks[4].last