Rename modules
This commit is contained in:
@@ -3,7 +3,7 @@ import asyncio
|
||||
import json
|
||||
from . import parser
|
||||
from . block import Block
|
||||
from . dlworker import DownloadWorker
|
||||
from . worker import ExtractWorker
|
||||
from . patch import Patch
|
||||
from ... import config
|
||||
from ... paramgen import arcparam
|
||||
@@ -79,11 +79,11 @@ def ready_blocks(video_id, duration, div, callback):
|
||||
_get_blocks(video_id, duration, div, callback))
|
||||
return blocks
|
||||
|
||||
def download_patch(callback, blocks, video_id):
|
||||
def fetch_patch(callback, blocks, video_id):
|
||||
|
||||
async def _allocate_workers():
|
||||
workers = [
|
||||
DownloadWorker(
|
||||
ExtractWorker(
|
||||
fetch = _fetch, block = block,
|
||||
blocks = blocks, video_id = video_id
|
||||
)
|
||||
@@ -16,9 +16,9 @@ class Block:
|
||||
this value increases as fetching chatdata progresses.
|
||||
|
||||
end : int :
|
||||
target videoOffsetTimeMs of last chat data for download,
|
||||
target videoOffsetTimeMs of last chat data for extract,
|
||||
equals to first videoOffsetTimeMs of next block.
|
||||
when download worker reaches this offset, stop downloading.
|
||||
when extract worker reaches this offset, stop fetching.
|
||||
|
||||
continuation : str :
|
||||
continuation param of last chat data.
|
||||
@@ -26,10 +26,10 @@ class Block:
|
||||
chat_data : list
|
||||
|
||||
done : bool :
|
||||
whether this block has been downloaded.
|
||||
whether this block has been fetched.
|
||||
|
||||
remaining : int :
|
||||
remaining data to download.
|
||||
remaining data to extract.
|
||||
equals end - last.
|
||||
|
||||
is_last : bool :
|
||||
@@ -8,7 +8,7 @@ from ... exceptions import InvalidVideoIdException
|
||||
logger = config.logger(__name__)
|
||||
headers=config.headers
|
||||
|
||||
class Downloader:
|
||||
class Extractor:
|
||||
def __init__(self, video_id, duration, div, callback):
|
||||
if not isinstance(div ,int) or div < 1:
|
||||
raise ValueError('div must be positive integer.')
|
||||
@@ -44,7 +44,7 @@ class Downloader:
|
||||
return self
|
||||
|
||||
def _download_blocks(self):
|
||||
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
|
||||
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||
return self
|
||||
|
||||
def _remove_duplicate_tail(self):
|
||||
@@ -57,7 +57,7 @@ class Downloader:
|
||||
ret.extend(block.chat_data)
|
||||
return ret
|
||||
|
||||
def download(self):
|
||||
def extract(self):
|
||||
return (
|
||||
self._ready_blocks()
|
||||
._remove_duplicate_head()
|
||||
@@ -68,7 +68,7 @@ class Downloader:
|
||||
._combine()
|
||||
)
|
||||
|
||||
def download(video_id, div = 1, callback = None, processor = None):
|
||||
def extract(video_id, div = 1, callback = None, processor = None):
|
||||
duration = 0
|
||||
try:
|
||||
duration = VideoInfo(video_id).get("duration")
|
||||
@@ -77,7 +77,7 @@ def download(video_id, div = 1, callback = None, processor = None):
|
||||
if duration == 0:
|
||||
print("video is live.")
|
||||
return []
|
||||
data = Downloader(video_id, duration, div, callback).download()
|
||||
data = Extractor(video_id, duration, div, callback).extract()
|
||||
if processor is None:
|
||||
return data
|
||||
return processor.process(
|
||||
@@ -5,7 +5,7 @@ from typing import NamedTuple
|
||||
class Patch(NamedTuple):
|
||||
"""
|
||||
Patch represents chunk of chat data
|
||||
which is fetched by asyncdl.download_patch._fetch().
|
||||
which is fetched by asyncdl.fetch_patch._fetch().
|
||||
"""
|
||||
chats : list = []
|
||||
continuation : str = None
|
||||
@@ -3,17 +3,17 @@ from . block import Block
|
||||
from . patch import Patch, fill, split
|
||||
from ... paramgen import arcparam
|
||||
|
||||
class DownloadWorker:
|
||||
class ExtractWorker:
|
||||
"""
|
||||
DownloadWorker associates a download session with a block.
|
||||
ExtractWorker associates a download session with a block.
|
||||
|
||||
When the dlworker finishes downloading, the block
|
||||
being downloaded is splitted and assigned the free dlworker.
|
||||
When the worker finishes fetching, the block
|
||||
being fetched is splitted and assigned the free worker.
|
||||
|
||||
Parameter
|
||||
----------
|
||||
fetch : func :
|
||||
download function of asyncdl
|
||||
extract function of asyncdl
|
||||
|
||||
block : Block :
|
||||
Block object that includes chat_data
|
||||
@@ -40,7 +40,7 @@ class DownloadWorker:
|
||||
patch = await self.fetch(
|
||||
self.block.continuation, session)
|
||||
if patch.continuation is None:
|
||||
"""TODO : make the dlworker assigned to the last block
|
||||
"""TODO : make the worker assigned to the last block
|
||||
to work more than twice as possible.
|
||||
"""
|
||||
break
|
||||
@@ -50,7 +50,7 @@ class DownloadWorker:
|
||||
else:
|
||||
fill(self.block, patch)
|
||||
if self.block.continuation is None:
|
||||
"""finished downloading this block """
|
||||
"""finished fetching this block """
|
||||
self.block.done = True
|
||||
self.block = _search_new_block(self)
|
||||
|
||||
@@ -4,7 +4,7 @@ import asyncio
|
||||
import json
|
||||
from . import parser
|
||||
from . block import Block
|
||||
from . dlworker import DownloadWorker
|
||||
from . worker import ExtractWorker
|
||||
from . patch import Patch
|
||||
from ... import config
|
||||
from ... paramgen import arcparam_mining as arcparam
|
||||
@@ -84,11 +84,11 @@ def ready_blocks(video_id, duration, div, callback):
|
||||
_get_blocks(video_id, duration, div, callback))
|
||||
return blocks
|
||||
|
||||
def download_patch(callback, blocks, video_id):
|
||||
def fetch_patch(callback, blocks, video_id):
|
||||
|
||||
async def _allocate_workers():
|
||||
workers = [
|
||||
DownloadWorker(
|
||||
ExtractWorker(
|
||||
fetch = _fetch, block = block,
|
||||
blocks = blocks, video_id = video_id
|
||||
)
|
||||
|
||||
@@ -16,9 +16,9 @@ class Block:
|
||||
this value increases as fetching chatdata progresses.
|
||||
|
||||
end : int :
|
||||
target videoOffsetTimeMs of last chat data for download,
|
||||
target videoOffsetTimeMs of last chat data for extract,
|
||||
equals to first videoOffsetTimeMs of next block.
|
||||
when download worker reaches this offset, stop downloading.
|
||||
when extract worker reaches this offset, stop fetching.
|
||||
|
||||
continuation : str :
|
||||
continuation param of last chat data.
|
||||
@@ -26,10 +26,10 @@ class Block:
|
||||
chat_data : list
|
||||
|
||||
done : bool :
|
||||
whether this block has been downloaded.
|
||||
whether this block has been fetched.
|
||||
|
||||
remaining : int :
|
||||
remaining data to download.
|
||||
remaining data to extract.
|
||||
equals end - last.
|
||||
|
||||
is_last : bool :
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import NamedTuple
|
||||
class Patch(NamedTuple):
|
||||
"""
|
||||
Patch represents chunk of chat data
|
||||
which is fetched by asyncdl.download_patch._fetch().
|
||||
which is fetched by asyncdl.fetch_patch._fetch().
|
||||
"""
|
||||
chats : list = []
|
||||
continuation : str = None
|
||||
|
||||
@@ -6,7 +6,7 @@ from ... exceptions import InvalidVideoIdException
|
||||
logger = config.logger(__name__)
|
||||
headers=config.headers
|
||||
|
||||
class Downloader:
|
||||
class SuperChatMiner:
|
||||
def __init__(self, video_id, duration, div, callback):
|
||||
if not isinstance(div ,int) or div < 1:
|
||||
raise ValueError('div must be positive integer.')
|
||||
@@ -34,7 +34,7 @@ class Downloader:
|
||||
return self
|
||||
|
||||
def _download_blocks(self):
|
||||
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
|
||||
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||
return self
|
||||
|
||||
def _combine(self):
|
||||
@@ -43,7 +43,7 @@ class Downloader:
|
||||
ret.extend(block.chat_data)
|
||||
return ret
|
||||
|
||||
def download(self):
|
||||
def extract(self):
|
||||
return (
|
||||
self._ready_blocks()
|
||||
._set_block_end()
|
||||
@@ -51,7 +51,7 @@ class Downloader:
|
||||
._combine()
|
||||
)
|
||||
|
||||
def download(video_id, div = 1, callback = None, processor = None):
|
||||
def extract(video_id, div = 1, callback = None, processor = None):
|
||||
duration = 0
|
||||
try:
|
||||
duration = VideoInfo(video_id).get("duration")
|
||||
@@ -60,7 +60,7 @@ def download(video_id, div = 1, callback = None, processor = None):
|
||||
if duration == 0:
|
||||
print("video is live.")
|
||||
return []
|
||||
data = Downloader(video_id, duration, div, callback).download()
|
||||
data = SuperChatMiner(video_id, duration, div, callback).extract()
|
||||
if processor is None:
|
||||
return data
|
||||
return processor.process(
|
||||
@@ -3,17 +3,17 @@ from . block import Block
|
||||
from . patch import Patch, fill
|
||||
from ... paramgen import arcparam
|
||||
INTERVAL = 1
|
||||
class DownloadWorker:
|
||||
class ExtractWorker:
|
||||
"""
|
||||
DownloadWorker associates a download session with a block.
|
||||
ExtractWorker associates a download session with a block.
|
||||
|
||||
When the dlworker finishes downloading, the block
|
||||
being downloaded is splitted and assigned the free dlworker.
|
||||
When the worker finishes fetching, the block
|
||||
being fetched is splitted and assigned the free worker.
|
||||
|
||||
Parameter
|
||||
----------
|
||||
fetch : func :
|
||||
download function of asyncdl
|
||||
extract function of asyncdl
|
||||
|
||||
block : Block :
|
||||
Block object that includes chat_data
|
||||
@@ -1,7 +1,7 @@
|
||||
import requests,json,datetime
|
||||
from .. import config
|
||||
|
||||
def download(url):
|
||||
def extract(url):
|
||||
_session = requests.Session()
|
||||
html = _session.get(url, headers=config.headers)
|
||||
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import json
|
||||
from pytchat.tool.download import parser
|
||||
from pytchat.tool.extract import parser
|
||||
import sys
|
||||
import time
|
||||
from aioresponses import aioresponses
|
||||
from concurrent.futures import CancelledError
|
||||
from pytchat.tool.download import asyncdl
|
||||
from pytchat.tool.extract import asyncdl
|
||||
|
||||
def _open_file(path):
|
||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||
@@ -3,10 +3,10 @@ import asyncio
|
||||
import json
|
||||
import os, sys
|
||||
import time
|
||||
from pytchat.tool.download import duplcheck
|
||||
from pytchat.tool.download import parser
|
||||
from pytchat.tool.download.block import Block
|
||||
from pytchat.tool.download.duplcheck import _dump
|
||||
from pytchat.tool.extract import duplcheck
|
||||
from pytchat.tool.extract import parser
|
||||
from pytchat.tool.extract.block import Block
|
||||
from pytchat.tool.extract.duplcheck import _dump
|
||||
def _open_file(path):
|
||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||
return f.read()
|
||||
@@ -23,7 +23,7 @@ def test_overlap():
|
||||
|
||||
def load_chatdata(filename):
|
||||
return parser.parse(
|
||||
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
|
||||
json.loads(_open_file("tests/testdata/extarct_duplcheck/overlap/"+filename))
|
||||
)[1]
|
||||
|
||||
blocks = (
|
||||
@@ -54,7 +54,7 @@ def test_duplicate_head():
|
||||
|
||||
def load_chatdata(filename):
|
||||
return parser.parse(
|
||||
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
||||
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
|
||||
)[1]
|
||||
|
||||
"""
|
||||
@@ -103,7 +103,7 @@ def test_duplicate_tail():
|
||||
"""
|
||||
def load_chatdata(filename):
|
||||
return parser.parse(
|
||||
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
||||
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
|
||||
)[1]
|
||||
#chat data offsets are ignored.
|
||||
blocks = (
|
||||
@@ -4,18 +4,18 @@ import json
|
||||
import os, sys
|
||||
import time
|
||||
from aioresponses import aioresponses
|
||||
from pytchat.tool.download import duplcheck
|
||||
from pytchat.tool.download import parser
|
||||
from pytchat.tool.download.block import Block
|
||||
from pytchat.tool.download.patch import Patch, fill, split, set_patch
|
||||
from pytchat.tool.download.duplcheck import _dump
|
||||
from pytchat.tool.extract import duplcheck
|
||||
from pytchat.tool.extract import parser
|
||||
from pytchat.tool.extract.block import Block
|
||||
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
|
||||
from pytchat.tool.extract.duplcheck import _dump
|
||||
def _open_file(path):
|
||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
def load_chatdata(filename):
|
||||
return parser.parse(
|
||||
json.loads(_open_file("tests/testdata/dl_patch/"+filename))
|
||||
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
|
||||
)[1]
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ def test_split_0():
|
||||
|
||||
~~~~~~ before ~~~~~~
|
||||
|
||||
@parent_block (# = already downloaded)
|
||||
@parent_block (# = already fetched)
|
||||
|
||||
first last end
|
||||
|########----------------------------------------|
|
||||
@@ -79,11 +79,11 @@ def test_split_1():
|
||||
"""patch.first <= parent_block.last
|
||||
|
||||
While awaiting at run()->asyncdl._fetch()
|
||||
downloading parent_block proceeds,
|
||||
fetching parent_block proceeds,
|
||||
and parent.block.last exceeds patch.first.
|
||||
|
||||
In this case, fetched patch is all discarded,
|
||||
and dlworker searches other processing block again.
|
||||
and worker searches other processing block again.
|
||||
|
||||
~~~~~~ before ~~~~~~
|
||||
|
||||
@@ -135,7 +135,7 @@ def test_split_2():
|
||||
|
||||
~~~~~~ before ~~~~~~
|
||||
|
||||
@parent_block (# = already downloaded)
|
||||
@parent_block (# = already fetched)
|
||||
first last end (before split)
|
||||
|########------------------------------|
|
||||
|
||||
@@ -163,7 +163,7 @@ def test_split_2():
|
||||
first last=end |
|
||||
|#################|...... cut extra data.
|
||||
^
|
||||
continuation : None (download complete)
|
||||
continuation : None (extract complete)
|
||||
|
||||
@fetched patch
|
||||
|-------- patch --------|
|
||||
@@ -188,11 +188,11 @@ def test_split_none():
|
||||
"""patch.last <= parent_block.last
|
||||
|
||||
While awaiting at run()->asyncdl._fetch()
|
||||
downloading parent_block proceeds,
|
||||
fetching parent_block proceeds,
|
||||
and parent.block.last exceeds patch.first.
|
||||
|
||||
In this case, fetched patch is all discarded,
|
||||
and dlworker searches other processing block again.
|
||||
and worker searches other processing block again.
|
||||
|
||||
~~~~~~ before ~~~~~~
|
||||
|
||||
Reference in New Issue
Block a user