diff --git a/pytchat/tool/download/__init__.py b/pytchat/tool/extract/__init__.py similarity index 100% rename from pytchat/tool/download/__init__.py rename to pytchat/tool/extract/__init__.py diff --git a/pytchat/tool/download/asyncdl.py b/pytchat/tool/extract/asyncdl.py similarity index 97% rename from pytchat/tool/download/asyncdl.py rename to pytchat/tool/extract/asyncdl.py index 9d17692..c361754 100644 --- a/pytchat/tool/download/asyncdl.py +++ b/pytchat/tool/extract/asyncdl.py @@ -3,7 +3,7 @@ import asyncio import json from . import parser from . block import Block -from . dlworker import DownloadWorker +from . worker import ExtractWorker from . patch import Patch from ... import config from ... paramgen import arcparam @@ -79,11 +79,11 @@ def ready_blocks(video_id, duration, div, callback): _get_blocks(video_id, duration, div, callback)) return blocks -def download_patch(callback, blocks, video_id): +def fetch_patch(callback, blocks, video_id): async def _allocate_workers(): workers = [ - DownloadWorker( + ExtractWorker( fetch = _fetch, block = block, blocks = blocks, video_id = video_id ) diff --git a/pytchat/tool/download/block.py b/pytchat/tool/extract/block.py similarity index 87% rename from pytchat/tool/download/block.py rename to pytchat/tool/extract/block.py index 3ca42de..cd854e7 100644 --- a/pytchat/tool/download/block.py +++ b/pytchat/tool/extract/block.py @@ -16,9 +16,9 @@ class Block: this value increases as fetching chatdata progresses. end : int : - target videoOffsetTimeMs of last chat data for download, + target videoOffsetTimeMs of last chat data for extract, equals to first videoOffsetTimeMs of next block. - when download worker reaches this offset, stop downloading. + when extract worker reaches this offset, stop fetching. continuation : str : continuation param of last chat data. @@ -26,10 +26,10 @@ class Block: chat_data : list done : bool : - whether this block has been downloaded. + whether this block has been fetched. remaining : int : - remaining data to download. + remaining data to extract. equals end - last. is_last : bool : diff --git a/pytchat/tool/download/duplcheck.py b/pytchat/tool/extract/duplcheck.py similarity index 100% rename from pytchat/tool/download/duplcheck.py rename to pytchat/tool/extract/duplcheck.py diff --git a/pytchat/tool/download/downloader.py b/pytchat/tool/extract/extractor.py similarity index 89% rename from pytchat/tool/download/downloader.py rename to pytchat/tool/extract/extractor.py index 15ab152..a8b5515 100644 --- a/pytchat/tool/download/downloader.py +++ b/pytchat/tool/extract/extractor.py @@ -8,7 +8,7 @@ from ... exceptions import InvalidVideoIdException logger = config.logger(__name__) headers=config.headers -class Downloader: +class Extractor: def __init__(self, video_id, duration, div, callback): if not isinstance(div ,int) or div < 1: raise ValueError('div must be positive integer.') @@ -44,7 +44,7 @@ class Downloader: return self def _download_blocks(self): - asyncdl.download_patch(self.callback, self.blocks, self.video_id) + asyncdl.fetch_patch(self.callback, self.blocks, self.video_id) return self def _remove_duplicate_tail(self): @@ -57,7 +57,7 @@ class Downloader: ret.extend(block.chat_data) return ret - def download(self): + def extract(self): return ( self._ready_blocks() ._remove_duplicate_head() @@ -68,7 +68,7 @@ class Downloader: ._combine() ) -def download(video_id, div = 1, callback = None, processor = None): +def extract(video_id, div = 1, callback = None, processor = None): duration = 0 try: duration = VideoInfo(video_id).get("duration") @@ -77,7 +77,7 @@ def download(video_id, div = 1, callback = None, processor = None): if duration == 0: print("video is live.") return [] - data = Downloader(video_id, duration, div, callback).download() + data = Extractor(video_id, duration, div, callback).extract() if processor is None: return data return processor.process( diff --git a/pytchat/tool/download/parser.py b/pytchat/tool/extract/parser.py similarity index 100% rename from pytchat/tool/download/parser.py rename to pytchat/tool/extract/parser.py diff --git a/pytchat/tool/download/patch.py b/pytchat/tool/extract/patch.py similarity index 96% rename from pytchat/tool/download/patch.py rename to pytchat/tool/extract/patch.py index 2305118..83a2e6d 100644 --- a/pytchat/tool/download/patch.py +++ b/pytchat/tool/extract/patch.py @@ -5,7 +5,7 @@ from typing import NamedTuple class Patch(NamedTuple): """ Patch represents chunk of chat data - which is fetched by asyncdl.download_patch._fetch(). + which is fetched by asyncdl.fetch_patch._fetch(). """ chats : list = [] continuation : str = None diff --git a/pytchat/tool/download/dlworker.py b/pytchat/tool/extract/worker.py similarity index 87% rename from pytchat/tool/download/dlworker.py rename to pytchat/tool/extract/worker.py index 5946bdf..e75a86d 100644 --- a/pytchat/tool/download/dlworker.py +++ b/pytchat/tool/extract/worker.py @@ -3,17 +3,17 @@ from . block import Block from . patch import Patch, fill, split from ... paramgen import arcparam -class DownloadWorker: +class ExtractWorker: """ - DownloadWorker associates a download session with a block. + ExtractWorker associates a download session with a block. - When the dlworker finishes downloading, the block - being downloaded is splitted and assigned the free dlworker. + When the worker finishes fetching, the block + being fetched is splitted and assigned the free worker. Parameter ---------- fetch : func : - download function of asyncdl + extract function of asyncdl block : Block : Block object that includes chat_data @@ -40,7 +40,7 @@ class DownloadWorker: patch = await self.fetch( self.block.continuation, session) if patch.continuation is None: - """TODO : make the dlworker assigned to the last block + """TODO : make the worker assigned to the last block to work more than twice as possible. """ break @@ -50,7 +50,7 @@ class DownloadWorker: else: fill(self.block, patch) if self.block.continuation is None: - """finished downloading this block """ + """finished fetching this block """ self.block.done = True self.block = _search_new_block(self) diff --git a/pytchat/tool/mining/asyncdl.py b/pytchat/tool/mining/asyncdl.py index fd62b56..f2211c5 100644 --- a/pytchat/tool/mining/asyncdl.py +++ b/pytchat/tool/mining/asyncdl.py @@ -4,7 +4,7 @@ import asyncio import json from . import parser from . block import Block -from . dlworker import DownloadWorker +from . worker import ExtractWorker from . patch import Patch from ... import config from ... paramgen import arcparam_mining as arcparam @@ -84,11 +84,11 @@ def ready_blocks(video_id, duration, div, callback): _get_blocks(video_id, duration, div, callback)) return blocks -def download_patch(callback, blocks, video_id): +def fetch_patch(callback, blocks, video_id): async def _allocate_workers(): workers = [ - DownloadWorker( + ExtractWorker( fetch = _fetch, block = block, blocks = blocks, video_id = video_id ) diff --git a/pytchat/tool/mining/block.py b/pytchat/tool/mining/block.py index 9767510..40c95d1 100644 --- a/pytchat/tool/mining/block.py +++ b/pytchat/tool/mining/block.py @@ -16,9 +16,9 @@ class Block: this value increases as fetching chatdata progresses. end : int : - target videoOffsetTimeMs of last chat data for download, + target videoOffsetTimeMs of last chat data for extract, equals to first videoOffsetTimeMs of next block. - when download worker reaches this offset, stop downloading. + when extract worker reaches this offset, stop fetching. continuation : str : continuation param of last chat data. @@ -26,10 +26,10 @@ class Block: chat_data : list done : bool : - whether this block has been downloaded. + whether this block has been fetched. remaining : int : - remaining data to download. + remaining data to extract. equals end - last. is_last : bool : diff --git a/pytchat/tool/mining/patch.py b/pytchat/tool/mining/patch.py index 186cc82..7666a52 100644 --- a/pytchat/tool/mining/patch.py +++ b/pytchat/tool/mining/patch.py @@ -5,7 +5,7 @@ from typing import NamedTuple class Patch(NamedTuple): """ Patch represents chunk of chat data - which is fetched by asyncdl.download_patch._fetch(). + which is fetched by asyncdl.fetch_patch._fetch(). """ chats : list = [] continuation : str = None diff --git a/pytchat/tool/mining/downloader.py b/pytchat/tool/mining/superchat_miner.py similarity index 87% rename from pytchat/tool/mining/downloader.py rename to pytchat/tool/mining/superchat_miner.py index 0df892b..d6052a8 100644 --- a/pytchat/tool/mining/downloader.py +++ b/pytchat/tool/mining/superchat_miner.py @@ -6,7 +6,7 @@ from ... exceptions import InvalidVideoIdException logger = config.logger(__name__) headers=config.headers -class Downloader: +class SuperChatMiner: def __init__(self, video_id, duration, div, callback): if not isinstance(div ,int) or div < 1: raise ValueError('div must be positive integer.') @@ -34,7 +34,7 @@ class Downloader: return self def _download_blocks(self): - asyncdl.download_patch(self.callback, self.blocks, self.video_id) + asyncdl.fetch_patch(self.callback, self.blocks, self.video_id) return self def _combine(self): @@ -43,7 +43,7 @@ class Downloader: ret.extend(block.chat_data) return ret - def download(self): + def extract(self): return ( self._ready_blocks() ._set_block_end() @@ -51,7 +51,7 @@ class Downloader: ._combine() ) -def download(video_id, div = 1, callback = None, processor = None): +def extract(video_id, div = 1, callback = None, processor = None): duration = 0 try: duration = VideoInfo(video_id).get("duration") @@ -60,7 +60,7 @@ def download(video_id, div = 1, callback = None, processor = None): if duration == 0: print("video is live.") return [] - data = Downloader(video_id, duration, div, callback).download() + data = SuperChatMiner(video_id, duration, div, callback).extract() if processor is None: return data return processor.process( diff --git a/pytchat/tool/mining/dlworker.py b/pytchat/tool/mining/worker.py similarity index 80% rename from pytchat/tool/mining/dlworker.py rename to pytchat/tool/mining/worker.py index f7da603..3a53e40 100644 --- a/pytchat/tool/mining/dlworker.py +++ b/pytchat/tool/mining/worker.py @@ -3,17 +3,17 @@ from . block import Block from . patch import Patch, fill from ... paramgen import arcparam INTERVAL = 1 -class DownloadWorker: +class ExtractWorker: """ - DownloadWorker associates a download session with a block. + ExtractWorker associates a download session with a block. - When the dlworker finishes downloading, the block - being downloaded is splitted and assigned the free dlworker. + When the worker finishes fetching, the block + being fetched is splitted and assigned the free worker. Parameter ---------- fetch : func : - download function of asyncdl + extract function of asyncdl block : Block : Block object that includes chat_data diff --git a/pytchat/util/__init__.py b/pytchat/util/__init__.py index 60be578..9b9d1ab 100644 --- a/pytchat/util/__init__.py +++ b/pytchat/util/__init__.py @@ -1,7 +1,7 @@ import requests,json,datetime from .. import config -def download(url): +def extract(url): _session = requests.Session() html = _session.get(url, headers=config.headers) with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') diff --git a/tests/test_dl_asyncdl.py b/tests/test_extract_asyncdl.py similarity index 93% rename from tests/test_dl_asyncdl.py rename to tests/test_extract_asyncdl.py index c28b327..2cef81b 100644 --- a/tests/test_dl_asyncdl.py +++ b/tests/test_extract_asyncdl.py @@ -1,12 +1,12 @@ import aiohttp import asyncio import json -from pytchat.tool.download import parser +from pytchat.tool.extract import parser import sys import time from aioresponses import aioresponses from concurrent.futures import CancelledError -from pytchat.tool.download import asyncdl +from pytchat.tool.extract import asyncdl def _open_file(path): with open(path,mode ='r',encoding = 'utf-8') as f: diff --git a/tests/test_dl_duplcheck.py b/tests/test_extract_duplcheck.py similarity index 90% rename from tests/test_dl_duplcheck.py rename to tests/test_extract_duplcheck.py index 67fc00c..7984dcc 100644 --- a/tests/test_dl_duplcheck.py +++ b/tests/test_extract_duplcheck.py @@ -3,10 +3,10 @@ import asyncio import json import os, sys import time -from pytchat.tool.download import duplcheck -from pytchat.tool.download import parser -from pytchat.tool.download.block import Block -from pytchat.tool.download.duplcheck import _dump +from pytchat.tool.extract import duplcheck +from pytchat.tool.extract import parser +from pytchat.tool.extract.block import Block +from pytchat.tool.extract.duplcheck import _dump def _open_file(path): with open(path,mode ='r',encoding = 'utf-8') as f: return f.read() @@ -23,7 +23,7 @@ def test_overlap(): def load_chatdata(filename): return parser.parse( - json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename)) + json.loads(_open_file("tests/testdata/extarct_duplcheck/overlap/"+filename)) )[1] blocks = ( @@ -54,7 +54,7 @@ def test_duplicate_head(): def load_chatdata(filename): return parser.parse( - json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename)) + json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename)) )[1] """ @@ -103,7 +103,7 @@ def test_duplicate_tail(): """ def load_chatdata(filename): return parser.parse( - json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename)) + json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename)) )[1] #chat data offsets are ignored. blocks = ( diff --git a/tests/test_dl_patch.py b/tests/test_extract_patch.py similarity index 91% rename from tests/test_dl_patch.py rename to tests/test_extract_patch.py index 4f5c208..2766497 100644 --- a/tests/test_dl_patch.py +++ b/tests/test_extract_patch.py @@ -4,18 +4,18 @@ import json import os, sys import time from aioresponses import aioresponses -from pytchat.tool.download import duplcheck -from pytchat.tool.download import parser -from pytchat.tool.download.block import Block -from pytchat.tool.download.patch import Patch, fill, split, set_patch -from pytchat.tool.download.duplcheck import _dump +from pytchat.tool.extract import duplcheck +from pytchat.tool.extract import parser +from pytchat.tool.extract.block import Block +from pytchat.tool.extract.patch import Patch, fill, split, set_patch +from pytchat.tool.extract.duplcheck import _dump def _open_file(path): with open(path,mode ='r',encoding = 'utf-8') as f: return f.read() def load_chatdata(filename): return parser.parse( - json.loads(_open_file("tests/testdata/dl_patch/"+filename)) + json.loads(_open_file("tests/testdata/fetch_patch/"+filename)) )[1] @@ -25,7 +25,7 @@ def test_split_0(): ~~~~~~ before ~~~~~~ - @parent_block (# = already downloaded) + @parent_block (# = already fetched) first last end |########----------------------------------------| @@ -79,11 +79,11 @@ def test_split_1(): """patch.first <= parent_block.last While awaiting at run()->asyncdl._fetch() - downloading parent_block proceeds, + fetching parent_block proceeds, and parent.block.last exceeds patch.first. In this case, fetched patch is all discarded, - and dlworker searches other processing block again. + and worker searches other processing block again. ~~~~~~ before ~~~~~~ @@ -135,7 +135,7 @@ def test_split_2(): ~~~~~~ before ~~~~~~ - @parent_block (# = already downloaded) + @parent_block (# = already fetched) first last end (before split) |########------------------------------| @@ -163,7 +163,7 @@ def test_split_2(): first last=end | |#################|...... cut extra data. ^ - continuation : None (download complete) + continuation : None (extract complete) @fetched patch |-------- patch --------| @@ -188,11 +188,11 @@ def test_split_none(): """patch.last <= parent_block.last While awaiting at run()->asyncdl._fetch() - downloading parent_block proceeds, + fetching parent_block proceeds, and parent.block.last exceeds patch.first. In this case, fetched patch is all discarded, - and dlworker searches other processing block again. + and worker searches other processing block again. ~~~~~~ before ~~~~~~ diff --git a/tests/testdata/dl_duplcheck/head/dp0-0.json b/tests/testdata/extract_duplcheck/head/dp0-0.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-0.json rename to tests/testdata/extract_duplcheck/head/dp0-0.json diff --git a/tests/testdata/dl_duplcheck/head/dp0-1.json b/tests/testdata/extract_duplcheck/head/dp0-1.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-1.json rename to tests/testdata/extract_duplcheck/head/dp0-1.json diff --git a/tests/testdata/dl_duplcheck/head/dp0-2.json b/tests/testdata/extract_duplcheck/head/dp0-2.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-2.json rename to tests/testdata/extract_duplcheck/head/dp0-2.json diff --git a/tests/testdata/dl_duplcheck/head/dp0-3.json b/tests/testdata/extract_duplcheck/head/dp0-3.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-3.json rename to tests/testdata/extract_duplcheck/head/dp0-3.json diff --git a/tests/testdata/dl_duplcheck/head/dp0-4.json b/tests/testdata/extract_duplcheck/head/dp0-4.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-4.json rename to tests/testdata/extract_duplcheck/head/dp0-4.json diff --git a/tests/testdata/dl_duplcheck/head/dp0-5.json b/tests/testdata/extract_duplcheck/head/dp0-5.json similarity index 100% rename from tests/testdata/dl_duplcheck/head/dp0-5.json rename to tests/testdata/extract_duplcheck/head/dp0-5.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-0.json b/tests/testdata/extract_duplcheck/overlap/dp0-0.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-0.json rename to tests/testdata/extract_duplcheck/overlap/dp0-0.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-1.json b/tests/testdata/extract_duplcheck/overlap/dp0-1.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-1.json rename to tests/testdata/extract_duplcheck/overlap/dp0-1.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-2.json b/tests/testdata/extract_duplcheck/overlap/dp0-2.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-2.json rename to tests/testdata/extract_duplcheck/overlap/dp0-2.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-3.json b/tests/testdata/extract_duplcheck/overlap/dp0-3.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-3.json rename to tests/testdata/extract_duplcheck/overlap/dp0-3.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-4.json b/tests/testdata/extract_duplcheck/overlap/dp0-4.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-4.json rename to tests/testdata/extract_duplcheck/overlap/dp0-4.json diff --git a/tests/testdata/dl_duplcheck/overlap/dp0-5.json b/tests/testdata/extract_duplcheck/overlap/dp0-5.json similarity index 100% rename from tests/testdata/dl_duplcheck/overlap/dp0-5.json rename to tests/testdata/extract_duplcheck/overlap/dp0-5.json diff --git a/tests/testdata/dl_patch/pt0-0.json b/tests/testdata/fetch_patch/pt0-0.json similarity index 100% rename from tests/testdata/dl_patch/pt0-0.json rename to tests/testdata/fetch_patch/pt0-0.json diff --git a/tests/testdata/dl_patch/pt0-1.json b/tests/testdata/fetch_patch/pt0-1.json similarity index 100% rename from tests/testdata/dl_patch/pt0-1.json rename to tests/testdata/fetch_patch/pt0-1.json diff --git a/tests/testdata/dl_patch/pt0-3.json b/tests/testdata/fetch_patch/pt0-3.json similarity index 100% rename from tests/testdata/dl_patch/pt0-3.json rename to tests/testdata/fetch_patch/pt0-3.json diff --git a/tests/testdata/dl_patch/pt0-4.json b/tests/testdata/fetch_patch/pt0-4.json similarity index 100% rename from tests/testdata/dl_patch/pt0-4.json rename to tests/testdata/fetch_patch/pt0-4.json diff --git a/tests/testdata/dl_patch/pt0-5.json b/tests/testdata/fetch_patch/pt0-5.json similarity index 100% rename from tests/testdata/dl_patch/pt0-5.json rename to tests/testdata/fetch_patch/pt0-5.json