Rename modules

This commit is contained in:
taizan-hokuto
2020-02-26 22:08:36 +09:00
parent 61d4e06470
commit de35537be8
34 changed files with 61 additions and 61 deletions

View File

@@ -3,7 +3,7 @@ import asyncio
import json
from . import parser
from . block import Block
from . dlworker import DownloadWorker
from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... paramgen import arcparam
@@ -79,11 +79,11 @@ def ready_blocks(video_id, duration, div, callback):
_get_blocks(video_id, duration, div, callback))
return blocks
def download_patch(callback, blocks, video_id):
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
workers = [
DownloadWorker(
ExtractWorker(
fetch = _fetch, block = block,
blocks = blocks, video_id = video_id
)

View File

@@ -16,9 +16,9 @@ class Block:
this value increases as fetching chatdata progresses.
end : int :
target videoOffsetTimeMs of last chat data for download,
target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, stop downloading.
when extract worker reaches this offset, stop fetching.
continuation : str :
continuation param of last chat data.
@@ -26,10 +26,10 @@ class Block:
chat_data : list
done : bool :
whether this block has been downloaded.
whether this block has been fetched.
remaining : int :
remaining data to download.
remaining data to extract.
equals end - last.
is_last : bool :

View File

@@ -8,7 +8,7 @@ from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers=config.headers
class Downloader:
class Extractor:
def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.')
@@ -44,7 +44,7 @@ class Downloader:
return self
def _download_blocks(self):
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self
def _remove_duplicate_tail(self):
@@ -57,7 +57,7 @@ class Downloader:
ret.extend(block.chat_data)
return ret
def download(self):
def extract(self):
return (
self._ready_blocks()
._remove_duplicate_head()
@@ -68,7 +68,7 @@ class Downloader:
._combine()
)
def download(video_id, div = 1, callback = None, processor = None):
def extract(video_id, div = 1, callback = None, processor = None):
duration = 0
try:
duration = VideoInfo(video_id).get("duration")
@@ -77,7 +77,7 @@ def download(video_id, div = 1, callback = None, processor = None):
if duration == 0:
print("video is live.")
return []
data = Downloader(video_id, duration, div, callback).download()
data = Extractor(video_id, duration, div, callback).extract()
if processor is None:
return data
return processor.process(

View File

@@ -5,7 +5,7 @@ from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
which is fetched by asyncdl.download_patch._fetch().
which is fetched by asyncdl.fetch_patch._fetch().
"""
chats : list = []
continuation : str = None

View File

@@ -3,17 +3,17 @@ from . block import Block
from . patch import Patch, fill, split
from ... paramgen import arcparam
class DownloadWorker:
class ExtractWorker:
"""
DownloadWorker associates a download session with a block.
ExtractWorker associates a download session with a block.
When the dlworker finishes downloading, the block
being downloaded is splitted and assigned the free dlworker.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.
Parameter
----------
fetch : func :
download function of asyncdl
extract function of asyncdl
block : Block :
Block object that includes chat_data
@@ -40,7 +40,7 @@ class DownloadWorker:
patch = await self.fetch(
self.block.continuation, session)
if patch.continuation is None:
"""TODO : make the dlworker assigned to the last block
"""TODO : make the worker assigned to the last block
to work more than twice as possible.
"""
break
@@ -50,7 +50,7 @@ class DownloadWorker:
else:
fill(self.block, patch)
if self.block.continuation is None:
"""finished downloading this block """
"""finished fetching this block """
self.block.done = True
self.block = _search_new_block(self)

View File

@@ -4,7 +4,7 @@ import asyncio
import json
from . import parser
from . block import Block
from . dlworker import DownloadWorker
from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... paramgen import arcparam_mining as arcparam
@@ -84,11 +84,11 @@ def ready_blocks(video_id, duration, div, callback):
_get_blocks(video_id, duration, div, callback))
return blocks
def download_patch(callback, blocks, video_id):
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
workers = [
DownloadWorker(
ExtractWorker(
fetch = _fetch, block = block,
blocks = blocks, video_id = video_id
)

View File

@@ -16,9 +16,9 @@ class Block:
this value increases as fetching chatdata progresses.
end : int :
target videoOffsetTimeMs of last chat data for download,
target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block.
when download worker reaches this offset, stop downloading.
when extract worker reaches this offset, stop fetching.
continuation : str :
continuation param of last chat data.
@@ -26,10 +26,10 @@ class Block:
chat_data : list
done : bool :
whether this block has been downloaded.
whether this block has been fetched.
remaining : int :
remaining data to download.
remaining data to extract.
equals end - last.
is_last : bool :

View File

@@ -5,7 +5,7 @@ from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
which is fetched by asyncdl.download_patch._fetch().
which is fetched by asyncdl.fetch_patch._fetch().
"""
chats : list = []
continuation : str = None

View File

@@ -6,7 +6,7 @@ from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers=config.headers
class Downloader:
class SuperChatMiner:
def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.')
@@ -34,7 +34,7 @@ class Downloader:
return self
def _download_blocks(self):
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self
def _combine(self):
@@ -43,7 +43,7 @@ class Downloader:
ret.extend(block.chat_data)
return ret
def download(self):
def extract(self):
return (
self._ready_blocks()
._set_block_end()
@@ -51,7 +51,7 @@ class Downloader:
._combine()
)
def download(video_id, div = 1, callback = None, processor = None):
def extract(video_id, div = 1, callback = None, processor = None):
duration = 0
try:
duration = VideoInfo(video_id).get("duration")
@@ -60,7 +60,7 @@ def download(video_id, div = 1, callback = None, processor = None):
if duration == 0:
print("video is live.")
return []
data = Downloader(video_id, duration, div, callback).download()
data = SuperChatMiner(video_id, duration, div, callback).extract()
if processor is None:
return data
return processor.process(

View File

@@ -3,17 +3,17 @@ from . block import Block
from . patch import Patch, fill
from ... paramgen import arcparam
INTERVAL = 1
class DownloadWorker:
class ExtractWorker:
"""
DownloadWorker associates a download session with a block.
ExtractWorker associates a download session with a block.
When the dlworker finishes downloading, the block
being downloaded is splitted and assigned the free dlworker.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.
Parameter
----------
fetch : func :
download function of asyncdl
extract function of asyncdl
block : Block :
Block object that includes chat_data

View File

@@ -1,7 +1,7 @@
import requests,json,datetime
from .. import config
def download(url):
def extract(url):
_session = requests.Session()
html = _session.get(url, headers=config.headers)
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')

View File

@@ -1,12 +1,12 @@
import aiohttp
import asyncio
import json
from pytchat.tool.download import parser
from pytchat.tool.extract import parser
import sys
import time
from aioresponses import aioresponses
from concurrent.futures import CancelledError
from pytchat.tool.download import asyncdl
from pytchat.tool.extract import asyncdl
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:

View File

@@ -3,10 +3,10 @@ import asyncio
import json
import os, sys
import time
from pytchat.tool.download import duplcheck
from pytchat.tool.download import parser
from pytchat.tool.download.block import Block
from pytchat.tool.download.duplcheck import _dump
from pytchat.tool.extract import duplcheck
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
@@ -23,7 +23,7 @@ def test_overlap():
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
json.loads(_open_file("tests/testdata/extarct_duplcheck/overlap/"+filename))
)[1]
blocks = (
@@ -54,7 +54,7 @@ def test_duplicate_head():
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
)[1]
"""
@@ -103,7 +103,7 @@ def test_duplicate_tail():
"""
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
)[1]
#chat data offsets are ignored.
blocks = (

View File

@@ -4,18 +4,18 @@ import json
import os, sys
import time
from aioresponses import aioresponses
from pytchat.tool.download import duplcheck
from pytchat.tool.download import parser
from pytchat.tool.download.block import Block
from pytchat.tool.download.patch import Patch, fill, split, set_patch
from pytchat.tool.download.duplcheck import _dump
from pytchat.tool.extract import duplcheck
from pytchat.tool.extract import parser
from pytchat.tool.extract.block import Block
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
from pytchat.tool.extract.duplcheck import _dump
def _open_file(path):
with open(path,mode ='r',encoding = 'utf-8') as f:
return f.read()
def load_chatdata(filename):
return parser.parse(
json.loads(_open_file("tests/testdata/dl_patch/"+filename))
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
)[1]
@@ -25,7 +25,7 @@ def test_split_0():
~~~~~~ before ~~~~~~
@parent_block (# = already downloaded)
@parent_block (# = already fetched)
first last end
|########----------------------------------------|
@@ -79,11 +79,11 @@ def test_split_1():
"""patch.first <= parent_block.last
While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds,
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and dlworker searches other processing block again.
and worker searches other processing block again.
~~~~~~ before ~~~~~~
@@ -135,7 +135,7 @@ def test_split_2():
~~~~~~ before ~~~~~~
@parent_block (# = already downloaded)
@parent_block (# = already fetched)
first last end (before split)
|########------------------------------|
@@ -163,7 +163,7 @@ def test_split_2():
first last=end |
|#################|...... cut extra data.
^
continuation : None (download complete)
continuation : None (extract complete)
@fetched patch
|-------- patch --------|
@@ -188,11 +188,11 @@ def test_split_none():
"""patch.last <= parent_block.last
While awaiting at run()->asyncdl._fetch()
downloading parent_block proceeds,
fetching parent_block proceeds,
and parent.block.last exceeds patch.first.
In this case, fetched patch is all discarded,
and dlworker searches other processing block again.
and worker searches other processing block again.
~~~~~~ before ~~~~~~