Remove files

This commit is contained in:
taizan-hokouto
2020-12-05 20:59:47 +09:00
parent eca766ff1a
commit 35a27fee3e
21 changed files with 1 additions and 1706 deletions

View File

@@ -1,134 +0,0 @@
import json
from pytchat.tool.extract import duplcheck
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.duplcheck import _dump
def _open_file(path):
with open(path, mode='r', encoding='utf-8') as f:
return f.read()
def test_overlap():
"""
test overlap data
operation : [0] [2] [3] [4] -> last :align to end
[1] , [5] -> no change
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file(
"tests/testdata/extract_duplcheck/overlap/" + filename))
)[1]
blocks = (
Block(first=0, last=12771, end=9890,
chat_data=load_chatdata("dp0-0.json")),
Block(first=9890, last=15800, end=20244,
chat_data=load_chatdata("dp0-1.json")),
Block(first=20244, last=45146, end=32476,
chat_data=load_chatdata("dp0-2.json")),
Block(first=32476, last=50520, end=41380,
chat_data=load_chatdata("dp0-3.json")),
Block(first=41380, last=62875, end=52568,
chat_data=load_chatdata("dp0-4.json")),
Block(first=52568, last=62875, end=54000,
chat_data=load_chatdata("dp0-5.json"), is_last=True)
)
result = duplcheck.remove_overlap(blocks)
# dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
# but must be aligne to the most close and smaller value:9779.
assert result[0].last == 9779
assert result[1].last == 15800
assert result[2].last == 32196
assert result[3].last == 41116
assert result[4].last == 52384
# the last block must be always added to result.
assert result[5].last == 62875
def test_duplicate_head():
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file(
"tests/testdata/extract_duplcheck/head/" + filename))
)[1]
"""
test duplicate head data
operation : [0] , [1] -> discard [0]
[1] , [2] -> discard [1]
[2] , [3] -> append [2]
[3] , [4] -> discard [3]
[4] , [5] -> append [4]
append [5]
result : [2] , [4] , [5]
"""
# chat data offsets are ignored.
blocks = (
Block(first=0, last=2500, chat_data=load_chatdata("dp0-0.json")),
Block(first=0, last=38771, chat_data=load_chatdata("dp0-1.json")),
Block(first=0, last=45146, chat_data=load_chatdata("dp0-2.json")),
Block(first=20244, last=60520, chat_data=load_chatdata("dp0-3.json")),
Block(first=20244, last=62875, chat_data=load_chatdata("dp0-4.json")),
Block(first=52568, last=62875, chat_data=load_chatdata("dp0-5.json"))
)
_dump(blocks)
result = duplcheck.remove_duplicate_head(blocks)
assert len(result) == 3
assert result[0].first == blocks[2].first
assert result[0].last == blocks[2].last
assert result[1].first == blocks[4].first
assert result[1].last == blocks[4].last
assert result[2].first == blocks[5].first
assert result[2].last == blocks[5].last
def test_duplicate_tail():
"""
test duplicate tail data
operation : append [0]
[0] , [1] -> discard [1]
[1] , [2] -> append [2]
[2] , [3] -> discard [3]
[3] , [4] -> append [4]
[4] , [5] -> discard [5]
result : [0] , [2] , [4]
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file(
"tests/testdata/extract_duplcheck/head/" + filename))
)[1]
# chat data offsets are ignored.
blocks = (
Block(first=0, last=2500, chat_data=load_chatdata("dp0-0.json")),
Block(first=1500, last=2500, chat_data=load_chatdata("dp0-1.json")),
Block(first=10000, last=45146, chat_data=load_chatdata("dp0-2.json")),
Block(first=20244, last=45146, chat_data=load_chatdata("dp0-3.json")),
Block(first=20244, last=62875, chat_data=load_chatdata("dp0-4.json")),
Block(first=52568, last=62875, chat_data=load_chatdata("dp0-5.json"))
)
result = duplcheck.remove_duplicate_tail(blocks)
_dump(result)
assert len(result) == 3
assert result[0].first == blocks[0].first
assert result[0].last == blocks[0].last
assert result[1].first == blocks[2].first
assert result[1].last == blocks[2].last
assert result[2].first == blocks[4].first
assert result[2].last == blocks[4].last

View File

@@ -1,239 +0,0 @@
import json
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.patch import Patch, split
def _open_file(path):
with open(path, mode='r', encoding='utf-8') as f:
return f.read()
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/fetch_patch/" + filename))
)[1]
def test_split_0():
"""
Normal case
~~~~~~ before ~~~~~~
@parent_block (# = already fetched)
first last end
|########----------------------------------------|
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (after split)
|########------------|
@child_block
first last end
|###########---------------|
@fetched patch
|-- patch --|
"""
parent = Block(first=0, last=4000, end=60000,
continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000,
continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent, child, patch)
assert child.continuation == 'patch'
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert parent.during_split is False
assert child.during_split is False
def test_split_1():
"""patch.first <= parent_block.last
While awaiting at run()->asyncdl._fetch()
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and worker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|#####|---------------------|
^
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end
|###########################|--------------------|
@child_block
.............. ->  discard all data
"""
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent, child, patch)
assert parent.last == 33000 # no change
assert parent.end == 60000 # no change
assert child.continuation is None
assert parent.during_split is False
assert child.during_split is True # exclude during_split sequence
def test_split_2():
"""child_block.end < patch.last:
Case the last offset of patch exceeds child_block.end.
In this case, remove overlapped data of patch.
~~~~~~ before ~~~~~~
@parent_block (# = already fetched)
first last end (before split)
|########------------------------------|
@child_block
first = last = 0 end (=parent_end)
| |
continuation:succeed from patch
@fetched patch
|-------- patch --------|
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (after split)
|########------------|
@child_block old patch.end
first last=end |
|#################|...... cut extra data.
^
continuation : None (extract complete)
@fetched patch
|-------- patch --------|
"""
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent, child, patch)
assert child.continuation is None
assert parent.last < child.first
assert parent.end == child.first
assert child.first < child.last
assert child.last < child.end
assert child.continuation is None
assert parent.during_split is False
assert child.during_split is False
def test_split_none():
"""patch.last <= parent_block.last
While awaiting at run()->asyncdl._fetch()
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and worker searches other processing block again.
~~~~~~ before ~~~~~~
patch.first
first | last end
|####################|###################|-------|
^
@child_block
first = last = 0 end (=parent_end)
| |
@fetched patch
|-- patch --|
patch.last < parent_block.last.
|
|
V
~~~~~~ after ~~~~~~
@parent_block
first last end (before split)
|########################################|-------|
@child_block
............ -> discard all data.
"""
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
patch = Patch(chats=load_chatdata('pt0-5.json'),
first=32500, last=34000, continuation='patch')
split(parent, child, patch)
assert parent.last == 40000 # no change
assert parent.end == 60000 # no change
assert child.continuation is None
assert parent.during_split is False
assert child.during_split is True # exclude during_split sequence

View File

@@ -1,101 +0,0 @@
from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException
def _open_file(path):
with open(path, mode='r', encoding='utf-8') as f:
return f.read()
def _set_test_data(filepath, mocker):
_text = _open_file(filepath)
response_mock = mocker.Mock()
response_mock.status_code = 200
response_mock.text = _text
mocker.patch('httpx.Client.get').return_value = response_mock
def test_archived_page(mocker):
_set_test_data('tests/testdata/videoinfo/archived_page.txt', mocker)
info = VideoInfo('__test_id__')
actual_thumbnail_url = 'https://i.ytimg.com/vi/fzI9FNjXQ0o/hqdefault.jpg'
assert info.video_id == '__test_id__'
assert info.get_channel_name() == 'GitHub'
assert info.get_thumbnail() == actual_thumbnail_url
assert info.get_title() == 'GitHub Arctic Code Vault'
assert info.get_channel_id() == 'UC7c3Kb6jYCRj4JOHHZTxKsQ'
assert info.get_duration() == 148
def test_live_page(mocker):
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
info = VideoInfo('__test_id__')
'''live page: duration==0'''
assert info.get_duration() == 0
assert info.video_id == '__test_id__'
assert info.get_channel_name() == 'BGM channel'
assert info.get_thumbnail() == \
'https://i.ytimg.com/vi/fEvM-OUbaKs/hqdefault_live.jpg'
assert info.get_title() == (
'Coffee Jazz Music - Chill Out Lounge Jazz Music Radio'
' - 24/7 Live Stream - Slow Jazz')
assert info.get_channel_id() == 'UCQINXHZqCU5i06HzxRkujfg'
def test_invalid_video_id(mocker):
'''Test case invalid video_id is specified.'''
_set_test_data(
'tests/testdata/videoinfo/invalid_video_id_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert False
except InvalidVideoIdException:
assert True
def test_no_info(mocker):
'''Test case the video page has renderer, but no info.'''
_set_test_data(
'tests/testdata/videoinfo/no_info_page.txt', mocker)
info = VideoInfo('__test_id__')
assert info.video_id == '__test_id__'
assert info.get_channel_name() is None
assert info.get_thumbnail() is None
assert info.get_title() is None
assert info.get_channel_id() is None
assert info.get_duration() is None
def test_collapsed_data(mocker):
'''Test case the video page's info is collapsed.'''
_set_test_data(
'tests/testdata/videoinfo/collapsed_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert False
except JSONDecodeError:
assert True
def test_pattern_unmatch(mocker):
'''Test case the pattern for extraction is unmatched.'''
_set_test_data(
'tests/testdata/videoinfo/pattern_unmatch.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert False
except JSONDecodeError:
assert True
def test_extradata_handling(mocker):
'''Test case the extracted data are JSON lines.'''
_set_test_data(
'tests/testdata/videoinfo/extradata_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert True
except JSONDecodeError as e:
print(e.doc)
assert False