Rename modules

This commit is contained in:
taizan-hokuto
2020-02-26 22:08:36 +09:00
parent 61d4e06470
commit de35537be8
34 changed files with 61 additions and 61 deletions

View File

@@ -3,7 +3,7 @@ import asyncio
import json import json
from . import parser from . import parser
from . block import Block from . block import Block
from . dlworker import DownloadWorker from . worker import ExtractWorker
from . patch import Patch from . patch import Patch
from ... import config from ... import config
from ... paramgen import arcparam from ... paramgen import arcparam
@@ -79,11 +79,11 @@ def ready_blocks(video_id, duration, div, callback):
_get_blocks(video_id, duration, div, callback)) _get_blocks(video_id, duration, div, callback))
return blocks return blocks
def download_patch(callback, blocks, video_id): def fetch_patch(callback, blocks, video_id):
async def _allocate_workers(): async def _allocate_workers():
workers = [ workers = [
DownloadWorker( ExtractWorker(
fetch = _fetch, block = block, fetch = _fetch, block = block,
blocks = blocks, video_id = video_id blocks = blocks, video_id = video_id
) )

View File

@@ -16,9 +16,9 @@ class Block:
this value increases as fetching chatdata progresses. this value increases as fetching chatdata progresses.
end : int : end : int :
target videoOffsetTimeMs of last chat data for download, target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block. equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, stop downloading. when extract worker reaches this offset, stop fetching.
continuation : str : continuation : str :
continuation param of last chat data. continuation param of last chat data.
@@ -26,10 +26,10 @@ class Block:
chat_data : list chat_data : list
done : bool : done : bool :
whether this block has been downloaded. whether this block has been fetched.
remaining : int : remaining : int :
remaining data to download. remaining data to extract.
equals end - last. equals end - last.
is_last : bool : is_last : bool :

View File

@@ -8,7 +8,7 @@ from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__) logger = config.logger(__name__)
headers=config.headers headers=config.headers
class Downloader: class Extractor:
def __init__(self, video_id, duration, div, callback): def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1: if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.') raise ValueError('div must be positive integer.')
@@ -44,7 +44,7 @@ class Downloader:
return self return self
def _download_blocks(self): def _download_blocks(self):
asyncdl.download_patch(self.callback, self.blocks, self.video_id) asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self return self
def _remove_duplicate_tail(self): def _remove_duplicate_tail(self):
@@ -57,7 +57,7 @@ class Downloader:
ret.extend(block.chat_data) ret.extend(block.chat_data)
return ret return ret
def download(self): def extract(self):
return ( return (
self._ready_blocks() self._ready_blocks()
._remove_duplicate_head() ._remove_duplicate_head()
@@ -68,7 +68,7 @@ class Downloader:
._combine() ._combine()
) )
def download(video_id, div = 1, callback = None, processor = None): def extract(video_id, div = 1, callback = None, processor = None):
duration = 0 duration = 0
try: try:
duration = VideoInfo(video_id).get("duration") duration = VideoInfo(video_id).get("duration")
@@ -77,7 +77,7 @@ def download(video_id, div = 1, callback = None, processor = None):
if duration == 0: if duration == 0:
print("video is live.") print("video is live.")
return [] return []
data = Downloader(video_id, duration, div, callback).download() data = Extractor(video_id, duration, div, callback).extract()
if processor is None: if processor is None:
return data return data
return processor.process( return processor.process(

View File

@@ -5,7 +5,7 @@ from typing import NamedTuple
class Patch(NamedTuple): class Patch(NamedTuple):
""" """
Patch represents chunk of chat data Patch represents chunk of chat data
which is fetched by asyncdl.download_patch._fetch(). which is fetched by asyncdl.fetch_patch._fetch().
""" """
chats : list = [] chats : list = []
continuation : str = None continuation : str = None

View File

@@ -3,17 +3,17 @@ from . block import Block
from . patch import Patch, fill, split from . patch import Patch, fill, split
from ... paramgen import arcparam from ... paramgen import arcparam
class DownloadWorker: class ExtractWorker:
""" """
DownloadWorker associates a download session with a block. ExtractWorker associates a download session with a block.
When the dlworker finishes downloading, the block When the worker finishes fetching, the block
being downloaded is splitted and assigned the free dlworker. being fetched is splitted and assigned the free worker.
Parameter Parameter
---------- ----------
fetch : func : fetch : func :
download function of asyncdl extract function of asyncdl
block : Block : block : Block :
Block object that includes chat_data Block object that includes chat_data
@@ -40,7 +40,7 @@ class DownloadWorker:
patch = await self.fetch( patch = await self.fetch(
self.block.continuation, session) self.block.continuation, session)
if patch.continuation is None: if patch.continuation is None:
"""TODO : make the dlworker assigned to the last block """TODO : make the worker assigned to the last block
to work more than twice as possible. to work more than twice as possible.
""" """
break break
@@ -50,7 +50,7 @@ class DownloadWorker:
else: else:
fill(self.block, patch) fill(self.block, patch)
if self.block.continuation is None: if self.block.continuation is None:
"""finished downloading this block """ """finished fetching this block """
self.block.done = True self.block.done = True
self.block = _search_new_block(self) self.block = _search_new_block(self)

View File

@@ -4,7 +4,7 @@ import asyncio
import json import json
from . import parser from . import parser
from . block import Block from . block import Block
from . dlworker import DownloadWorker from . worker import ExtractWorker
from . patch import Patch from . patch import Patch
from ... import config from ... import config
from ... paramgen import arcparam_mining as arcparam from ... paramgen import arcparam_mining as arcparam
@@ -84,11 +84,11 @@ def ready_blocks(video_id, duration, div, callback):
_get_blocks(video_id, duration, div, callback)) _get_blocks(video_id, duration, div, callback))
return blocks return blocks
def download_patch(callback, blocks, video_id): def fetch_patch(callback, blocks, video_id):
async def _allocate_workers(): async def _allocate_workers():
workers = [ workers = [
DownloadWorker( ExtractWorker(
fetch = _fetch, block = block, fetch = _fetch, block = block,
blocks = blocks, video_id = video_id blocks = blocks, video_id = video_id
) )

View File

@@ -16,9 +16,9 @@ class Block:
this value increases as fetching chatdata progresses. this value increases as fetching chatdata progresses.
end : int : end : int :
target videoOffsetTimeMs of last chat data for download, target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block. equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, stop downloading. when extract worker reaches this offset, stop fetching.
continuation : str : continuation : str :
continuation param of last chat data. continuation param of last chat data.
@@ -26,10 +26,10 @@ class Block:
chat_data : list chat_data : list
done : bool : done : bool :
whether this block has been downloaded. whether this block has been fetched.
remaining : int : remaining : int :
remaining data to download. remaining data to extract.
equals end - last. equals end - last.
is_last : bool : is_last : bool :

View File

@@ -5,7 +5,7 @@ from typing import NamedTuple
class Patch(NamedTuple): class Patch(NamedTuple):
""" """
Patch represents chunk of chat data Patch represents chunk of chat data
which is fetched by asyncdl.download_patch._fetch(). which is fetched by asyncdl.fetch_patch._fetch().
""" """
chats : list = [] chats : list = []
continuation : str = None continuation : str = None

View File

@@ -6,7 +6,7 @@ from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__) logger = config.logger(__name__)
headers=config.headers headers=config.headers
class Downloader: class SuperChatMiner:
def __init__(self, video_id, duration, div, callback): def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1: if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.') raise ValueError('div must be positive integer.')
@@ -34,7 +34,7 @@ class Downloader:
return self return self
def _download_blocks(self): def _download_blocks(self):
asyncdl.download_patch(self.callback, self.blocks, self.video_id) asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self return self
def _combine(self): def _combine(self):
@@ -43,7 +43,7 @@ class Downloader:
ret.extend(block.chat_data) ret.extend(block.chat_data)
return ret return ret
def download(self): def extract(self):
return ( return (
self._ready_blocks() self._ready_blocks()
._set_block_end() ._set_block_end()
@@ -51,7 +51,7 @@ class Downloader:
._combine() ._combine()
) )
def download(video_id, div = 1, callback = None, processor = None): def extract(video_id, div = 1, callback = None, processor = None):
duration = 0 duration = 0
try: try:
duration = VideoInfo(video_id).get("duration") duration = VideoInfo(video_id).get("duration")
@@ -60,7 +60,7 @@ def download(video_id, div = 1, callback = None, processor = None):
if duration == 0: if duration == 0:
print("video is live.") print("video is live.")
return [] return []
data = Downloader(video_id, duration, div, callback).download() data = SuperChatMiner(video_id, duration, div, callback).extract()
if processor is None: if processor is None:
return data return data
return processor.process( return processor.process(

View File

@@ -3,17 +3,17 @@ from . block import Block
from . patch import Patch, fill from . patch import Patch, fill
from ... paramgen import arcparam from ... paramgen import arcparam
INTERVAL = 1 INTERVAL = 1
class DownloadWorker: class ExtractWorker:
""" """
DownloadWorker associates a download session with a block. ExtractWorker associates a download session with a block.
When the dlworker finishes downloading, the block When the worker finishes fetching, the block
being downloaded is splitted and assigned the free dlworker. being fetched is splitted and assigned the free worker.
Parameter Parameter
---------- ----------
fetch : func : fetch : func :
download function of asyncdl extract function of asyncdl
block : Block : block : Block :
Block object that includes chat_data Block object that includes chat_data

View File

@@ -1,7 +1,7 @@
import requests,json,datetime import requests,json,datetime
from .. import config from .. import config
def download(url): def extract(url):
_session = requests.Session() _session = requests.Session()
html = _session.get(url, headers=config.headers) html = _session.get(url, headers=config.headers)
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')

View File

@@ -1,12 +1,12 @@
import aiohttp import aiohttp
import asyncio import asyncio
import json import json
from pytchat.tool.download import parser from pytchat.tool.extract import parser
import sys import sys
import time import time
from aioresponses import aioresponses from aioresponses import aioresponses
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from pytchat.tool.download import asyncdl from pytchat.tool.extract import asyncdl
def _open_file(path): def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f: with open(path,mode ='r',encoding = 'utf-8') as f:

View File

@@ -3,10 +3,10 @@ import asyncio
import json import json
import os, sys import os, sys
import time import time
from pytchat.tool.download import duplcheck from pytchat.tool.extract import duplcheck
from pytchat.tool.download import parser from pytchat.tool.extract import parser
from pytchat.tool.download.block import Block from pytchat.tool.extract.block import Block
from pytchat.tool.download.duplcheck import _dump from pytchat.tool.extract.duplcheck import _dump
def _open_file(path): def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f: with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read() return f.read()
@@ -23,7 +23,7 @@ def test_overlap():
def load_chatdata(filename): def load_chatdata(filename):
return parser.parse( return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename)) json.loads(_open_file("tests/testdata/extarct_duplcheck/overlap/"+filename))
)[1] )[1]
blocks = ( blocks = (
@@ -54,7 +54,7 @@ def test_duplicate_head():
def load_chatdata(filename): def load_chatdata(filename):
return parser.parse( return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename)) json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
)[1] )[1]
""" """
@@ -103,7 +103,7 @@ def test_duplicate_tail():
""" """
def load_chatdata(filename): def load_chatdata(filename):
return parser.parse( return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename)) json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
)[1] )[1]
#chat data offsets are ignored. #chat data offsets are ignored.
blocks = ( blocks = (

View File

@@ -4,18 +4,18 @@ import json
import os, sys import os, sys
import time import time
from aioresponses import aioresponses from aioresponses import aioresponses
from pytchat.tool.download import duplcheck from pytchat.tool.extract import duplcheck
from pytchat.tool.download import parser from pytchat.tool.extract import parser
from pytchat.tool.download.block import Block from pytchat.tool.extract.block import Block
from pytchat.tool.download.patch import Patch, fill, split, set_patch from pytchat.tool.extract.patch import Patch, fill, split, set_patch
from pytchat.tool.download.duplcheck import _dump from pytchat.tool.extract.duplcheck import _dump
def _open_file(path): def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f: with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read() return f.read()
def load_chatdata(filename): def load_chatdata(filename):
return parser.parse( return parser.parse(
json.loads(_open_file("tests/testdata/dl_patch/"+filename)) json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
)[1] )[1]
@@ -25,7 +25,7 @@ def test_split_0():
~~~~~~ before ~~~~~~ ~~~~~~ before ~~~~~~
@parent_block (# = already downloaded) @parent_block (# = already fetched)
first last end first last end
|########----------------------------------------| |########----------------------------------------|
@@ -79,11 +79,11 @@ def test_split_1():
"""patch.first <= parent_block.last """patch.first <= parent_block.last
While awaiting at run()->asyncdl._fetch() While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds, fetching parent_block proceeds,
and parent.block.last exceeds patch.first. and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded, In this case, fetched patch is all discarded,
and dlworker searches other processing block again. and worker searches other processing block again.
~~~~~~ before ~~~~~~ ~~~~~~ before ~~~~~~
@@ -135,7 +135,7 @@ def test_split_2():
~~~~~~ before ~~~~~~ ~~~~~~ before ~~~~~~
@parent_block (# = already downloaded) @parent_block (# = already fetched)
first last end (before split) first last end (before split)
|########------------------------------| |########------------------------------|
@@ -163,7 +163,7 @@ def test_split_2():
first last=end | first last=end |
|#################|...... cut extra data. |#################|...... cut extra data.
^ ^
continuation : None (download complete) continuation : None (extract complete)
@fetched patch @fetched patch
|-------- patch --------| |-------- patch --------|
@@ -188,11 +188,11 @@ def test_split_none():
"""patch.last <= parent_block.last """patch.last <= parent_block.last
While awaiting at run()->asyncdl._fetch() While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds, fetching parent_block proceeds,
and parent.block.last exceeds patch.first. and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded, In this case, fetched patch is all discarded,
and dlworker searches other processing block again. and worker searches other processing block again.
~~~~~~ before ~~~~~~ ~~~~~~ before ~~~~~~