Rename modules
This commit is contained in:
@@ -3,7 +3,7 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . dlworker import DownloadWorker
|
from . worker import ExtractWorker
|
||||||
from . patch import Patch
|
from . patch import Patch
|
||||||
from ... import config
|
from ... import config
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
@@ -79,11 +79,11 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
_get_blocks(video_id, duration, div, callback))
|
_get_blocks(video_id, duration, div, callback))
|
||||||
return blocks
|
return blocks
|
||||||
|
|
||||||
def download_patch(callback, blocks, video_id):
|
def fetch_patch(callback, blocks, video_id):
|
||||||
|
|
||||||
async def _allocate_workers():
|
async def _allocate_workers():
|
||||||
workers = [
|
workers = [
|
||||||
DownloadWorker(
|
ExtractWorker(
|
||||||
fetch = _fetch, block = block,
|
fetch = _fetch, block = block,
|
||||||
blocks = blocks, video_id = video_id
|
blocks = blocks, video_id = video_id
|
||||||
)
|
)
|
||||||
@@ -16,9 +16,9 @@ class Block:
|
|||||||
this value increases as fetching chatdata progresses.
|
this value increases as fetching chatdata progresses.
|
||||||
|
|
||||||
end : int :
|
end : int :
|
||||||
target videoOffsetTimeMs of last chat data for download,
|
target videoOffsetTimeMs of last chat data for extract,
|
||||||
equals to first videoOffsetTimeMs of next block.
|
equals to first videoOffsetTimeMs of next block.
|
||||||
when download worker reaches this offset, stop downloading.
|
when extract worker reaches this offset, stop fetching.
|
||||||
|
|
||||||
continuation : str :
|
continuation : str :
|
||||||
continuation param of last chat data.
|
continuation param of last chat data.
|
||||||
@@ -26,10 +26,10 @@ class Block:
|
|||||||
chat_data : list
|
chat_data : list
|
||||||
|
|
||||||
done : bool :
|
done : bool :
|
||||||
whether this block has been downloaded.
|
whether this block has been fetched.
|
||||||
|
|
||||||
remaining : int :
|
remaining : int :
|
||||||
remaining data to download.
|
remaining data to extract.
|
||||||
equals end - last.
|
equals end - last.
|
||||||
|
|
||||||
is_last : bool :
|
is_last : bool :
|
||||||
@@ -8,7 +8,7 @@ from ... exceptions import InvalidVideoIdException
|
|||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
headers=config.headers
|
headers=config.headers
|
||||||
|
|
||||||
class Downloader:
|
class Extractor:
|
||||||
def __init__(self, video_id, duration, div, callback):
|
def __init__(self, video_id, duration, div, callback):
|
||||||
if not isinstance(div ,int) or div < 1:
|
if not isinstance(div ,int) or div < 1:
|
||||||
raise ValueError('div must be positive integer.')
|
raise ValueError('div must be positive integer.')
|
||||||
@@ -44,7 +44,7 @@ class Downloader:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def _download_blocks(self):
|
def _download_blocks(self):
|
||||||
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
|
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _remove_duplicate_tail(self):
|
def _remove_duplicate_tail(self):
|
||||||
@@ -57,7 +57,7 @@ class Downloader:
|
|||||||
ret.extend(block.chat_data)
|
ret.extend(block.chat_data)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def download(self):
|
def extract(self):
|
||||||
return (
|
return (
|
||||||
self._ready_blocks()
|
self._ready_blocks()
|
||||||
._remove_duplicate_head()
|
._remove_duplicate_head()
|
||||||
@@ -68,7 +68,7 @@ class Downloader:
|
|||||||
._combine()
|
._combine()
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(video_id, div = 1, callback = None, processor = None):
|
def extract(video_id, div = 1, callback = None, processor = None):
|
||||||
duration = 0
|
duration = 0
|
||||||
try:
|
try:
|
||||||
duration = VideoInfo(video_id).get("duration")
|
duration = VideoInfo(video_id).get("duration")
|
||||||
@@ -77,7 +77,7 @@ def download(video_id, div = 1, callback = None, processor = None):
|
|||||||
if duration == 0:
|
if duration == 0:
|
||||||
print("video is live.")
|
print("video is live.")
|
||||||
return []
|
return []
|
||||||
data = Downloader(video_id, duration, div, callback).download()
|
data = Extractor(video_id, duration, div, callback).extract()
|
||||||
if processor is None:
|
if processor is None:
|
||||||
return data
|
return data
|
||||||
return processor.process(
|
return processor.process(
|
||||||
@@ -5,7 +5,7 @@ from typing import NamedTuple
|
|||||||
class Patch(NamedTuple):
|
class Patch(NamedTuple):
|
||||||
"""
|
"""
|
||||||
Patch represents chunk of chat data
|
Patch represents chunk of chat data
|
||||||
which is fetched by asyncdl.download_patch._fetch().
|
which is fetched by asyncdl.fetch_patch._fetch().
|
||||||
"""
|
"""
|
||||||
chats : list = []
|
chats : list = []
|
||||||
continuation : str = None
|
continuation : str = None
|
||||||
@@ -3,17 +3,17 @@ from . block import Block
|
|||||||
from . patch import Patch, fill, split
|
from . patch import Patch, fill, split
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
|
|
||||||
class DownloadWorker:
|
class ExtractWorker:
|
||||||
"""
|
"""
|
||||||
DownloadWorker associates a download session with a block.
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
When the dlworker finishes downloading, the block
|
When the worker finishes fetching, the block
|
||||||
being downloaded is splitted and assigned the free dlworker.
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
Parameter
|
Parameter
|
||||||
----------
|
----------
|
||||||
fetch : func :
|
fetch : func :
|
||||||
download function of asyncdl
|
extract function of asyncdl
|
||||||
|
|
||||||
block : Block :
|
block : Block :
|
||||||
Block object that includes chat_data
|
Block object that includes chat_data
|
||||||
@@ -40,7 +40,7 @@ class DownloadWorker:
|
|||||||
patch = await self.fetch(
|
patch = await self.fetch(
|
||||||
self.block.continuation, session)
|
self.block.continuation, session)
|
||||||
if patch.continuation is None:
|
if patch.continuation is None:
|
||||||
"""TODO : make the dlworker assigned to the last block
|
"""TODO : make the worker assigned to the last block
|
||||||
to work more than twice as possible.
|
to work more than twice as possible.
|
||||||
"""
|
"""
|
||||||
break
|
break
|
||||||
@@ -50,7 +50,7 @@ class DownloadWorker:
|
|||||||
else:
|
else:
|
||||||
fill(self.block, patch)
|
fill(self.block, patch)
|
||||||
if self.block.continuation is None:
|
if self.block.continuation is None:
|
||||||
"""finished downloading this block """
|
"""finished fetching this block """
|
||||||
self.block.done = True
|
self.block.done = True
|
||||||
self.block = _search_new_block(self)
|
self.block = _search_new_block(self)
|
||||||
|
|
||||||
@@ -4,7 +4,7 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . dlworker import DownloadWorker
|
from . worker import ExtractWorker
|
||||||
from . patch import Patch
|
from . patch import Patch
|
||||||
from ... import config
|
from ... import config
|
||||||
from ... paramgen import arcparam_mining as arcparam
|
from ... paramgen import arcparam_mining as arcparam
|
||||||
@@ -84,11 +84,11 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
_get_blocks(video_id, duration, div, callback))
|
_get_blocks(video_id, duration, div, callback))
|
||||||
return blocks
|
return blocks
|
||||||
|
|
||||||
def download_patch(callback, blocks, video_id):
|
def fetch_patch(callback, blocks, video_id):
|
||||||
|
|
||||||
async def _allocate_workers():
|
async def _allocate_workers():
|
||||||
workers = [
|
workers = [
|
||||||
DownloadWorker(
|
ExtractWorker(
|
||||||
fetch = _fetch, block = block,
|
fetch = _fetch, block = block,
|
||||||
blocks = blocks, video_id = video_id
|
blocks = blocks, video_id = video_id
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ class Block:
|
|||||||
this value increases as fetching chatdata progresses.
|
this value increases as fetching chatdata progresses.
|
||||||
|
|
||||||
end : int :
|
end : int :
|
||||||
target videoOffsetTimeMs of last chat data for download,
|
target videoOffsetTimeMs of last chat data for extract,
|
||||||
equals to first videoOffsetTimeMs of next block.
|
equals to first videoOffsetTimeMs of next block.
|
||||||
when download worker reaches this offset, stop downloading.
|
when extract worker reaches this offset, stop fetching.
|
||||||
|
|
||||||
continuation : str :
|
continuation : str :
|
||||||
continuation param of last chat data.
|
continuation param of last chat data.
|
||||||
@@ -26,10 +26,10 @@ class Block:
|
|||||||
chat_data : list
|
chat_data : list
|
||||||
|
|
||||||
done : bool :
|
done : bool :
|
||||||
whether this block has been downloaded.
|
whether this block has been fetched.
|
||||||
|
|
||||||
remaining : int :
|
remaining : int :
|
||||||
remaining data to download.
|
remaining data to extract.
|
||||||
equals end - last.
|
equals end - last.
|
||||||
|
|
||||||
is_last : bool :
|
is_last : bool :
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import NamedTuple
|
|||||||
class Patch(NamedTuple):
|
class Patch(NamedTuple):
|
||||||
"""
|
"""
|
||||||
Patch represents chunk of chat data
|
Patch represents chunk of chat data
|
||||||
which is fetched by asyncdl.download_patch._fetch().
|
which is fetched by asyncdl.fetch_patch._fetch().
|
||||||
"""
|
"""
|
||||||
chats : list = []
|
chats : list = []
|
||||||
continuation : str = None
|
continuation : str = None
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from ... exceptions import InvalidVideoIdException
|
|||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
headers=config.headers
|
headers=config.headers
|
||||||
|
|
||||||
class Downloader:
|
class SuperChatMiner:
|
||||||
def __init__(self, video_id, duration, div, callback):
|
def __init__(self, video_id, duration, div, callback):
|
||||||
if not isinstance(div ,int) or div < 1:
|
if not isinstance(div ,int) or div < 1:
|
||||||
raise ValueError('div must be positive integer.')
|
raise ValueError('div must be positive integer.')
|
||||||
@@ -34,7 +34,7 @@ class Downloader:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def _download_blocks(self):
|
def _download_blocks(self):
|
||||||
asyncdl.download_patch(self.callback, self.blocks, self.video_id)
|
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _combine(self):
|
def _combine(self):
|
||||||
@@ -43,7 +43,7 @@ class Downloader:
|
|||||||
ret.extend(block.chat_data)
|
ret.extend(block.chat_data)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def download(self):
|
def extract(self):
|
||||||
return (
|
return (
|
||||||
self._ready_blocks()
|
self._ready_blocks()
|
||||||
._set_block_end()
|
._set_block_end()
|
||||||
@@ -51,7 +51,7 @@ class Downloader:
|
|||||||
._combine()
|
._combine()
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(video_id, div = 1, callback = None, processor = None):
|
def extract(video_id, div = 1, callback = None, processor = None):
|
||||||
duration = 0
|
duration = 0
|
||||||
try:
|
try:
|
||||||
duration = VideoInfo(video_id).get("duration")
|
duration = VideoInfo(video_id).get("duration")
|
||||||
@@ -60,7 +60,7 @@ def download(video_id, div = 1, callback = None, processor = None):
|
|||||||
if duration == 0:
|
if duration == 0:
|
||||||
print("video is live.")
|
print("video is live.")
|
||||||
return []
|
return []
|
||||||
data = Downloader(video_id, duration, div, callback).download()
|
data = SuperChatMiner(video_id, duration, div, callback).extract()
|
||||||
if processor is None:
|
if processor is None:
|
||||||
return data
|
return data
|
||||||
return processor.process(
|
return processor.process(
|
||||||
@@ -3,17 +3,17 @@ from . block import Block
|
|||||||
from . patch import Patch, fill
|
from . patch import Patch, fill
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
INTERVAL = 1
|
INTERVAL = 1
|
||||||
class DownloadWorker:
|
class ExtractWorker:
|
||||||
"""
|
"""
|
||||||
DownloadWorker associates a download session with a block.
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
When the dlworker finishes downloading, the block
|
When the worker finishes fetching, the block
|
||||||
being downloaded is splitted and assigned the free dlworker.
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
Parameter
|
Parameter
|
||||||
----------
|
----------
|
||||||
fetch : func :
|
fetch : func :
|
||||||
download function of asyncdl
|
extract function of asyncdl
|
||||||
|
|
||||||
block : Block :
|
block : Block :
|
||||||
Block object that includes chat_data
|
Block object that includes chat_data
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import requests,json,datetime
|
import requests,json,datetime
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
def download(url):
|
def extract(url):
|
||||||
_session = requests.Session()
|
_session = requests.Session()
|
||||||
html = _session.get(url, headers=config.headers)
|
html = _session.get(url, headers=config.headers)
|
||||||
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from pytchat.tool.download import parser
|
from pytchat.tool.extract import parser
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from aioresponses import aioresponses
|
from aioresponses import aioresponses
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
from pytchat.tool.download import asyncdl
|
from pytchat.tool.extract import asyncdl
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
@@ -3,10 +3,10 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import os, sys
|
import os, sys
|
||||||
import time
|
import time
|
||||||
from pytchat.tool.download import duplcheck
|
from pytchat.tool.extract import duplcheck
|
||||||
from pytchat.tool.download import parser
|
from pytchat.tool.extract import parser
|
||||||
from pytchat.tool.download.block import Block
|
from pytchat.tool.extract.block import Block
|
||||||
from pytchat.tool.download.duplcheck import _dump
|
from pytchat.tool.extract.duplcheck import _dump
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
@@ -23,7 +23,7 @@ def test_overlap():
|
|||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/dl_duplcheck/overlap/"+filename))
|
json.loads(_open_file("tests/testdata/extarct_duplcheck/overlap/"+filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
blocks = (
|
blocks = (
|
||||||
@@ -54,7 +54,7 @@ def test_duplicate_head():
|
|||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -103,7 +103,7 @@ def test_duplicate_tail():
|
|||||||
"""
|
"""
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/dl_duplcheck/head/"+filename))
|
json.loads(_open_file("tests/testdata/extarct_duplcheck/head/"+filename))
|
||||||
)[1]
|
)[1]
|
||||||
#chat data offsets are ignored.
|
#chat data offsets are ignored.
|
||||||
blocks = (
|
blocks = (
|
||||||
@@ -4,18 +4,18 @@ import json
|
|||||||
import os, sys
|
import os, sys
|
||||||
import time
|
import time
|
||||||
from aioresponses import aioresponses
|
from aioresponses import aioresponses
|
||||||
from pytchat.tool.download import duplcheck
|
from pytchat.tool.extract import duplcheck
|
||||||
from pytchat.tool.download import parser
|
from pytchat.tool.extract import parser
|
||||||
from pytchat.tool.download.block import Block
|
from pytchat.tool.extract.block import Block
|
||||||
from pytchat.tool.download.patch import Patch, fill, split, set_patch
|
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
|
||||||
from pytchat.tool.download.duplcheck import _dump
|
from pytchat.tool.extract.duplcheck import _dump
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/dl_patch/"+filename))
|
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ def test_split_0():
|
|||||||
|
|
||||||
~~~~~~ before ~~~~~~
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
@parent_block (# = already downloaded)
|
@parent_block (# = already fetched)
|
||||||
|
|
||||||
first last end
|
first last end
|
||||||
|########----------------------------------------|
|
|########----------------------------------------|
|
||||||
@@ -79,11 +79,11 @@ def test_split_1():
|
|||||||
"""patch.first <= parent_block.last
|
"""patch.first <= parent_block.last
|
||||||
|
|
||||||
While awaiting at run()->asyncdl._fetch()
|
While awaiting at run()->asyncdl._fetch()
|
||||||
downloading parent_block proceeds,
|
fetching parent_block proceeds,
|
||||||
and parent.block.last exceeds patch.first.
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
In this case, fetched patch is all discarded,
|
In this case, fetched patch is all discarded,
|
||||||
and dlworker searches other processing block again.
|
and worker searches other processing block again.
|
||||||
|
|
||||||
~~~~~~ before ~~~~~~
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
@@ -135,7 +135,7 @@ def test_split_2():
|
|||||||
|
|
||||||
~~~~~~ before ~~~~~~
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
@parent_block (# = already downloaded)
|
@parent_block (# = already fetched)
|
||||||
first last end (before split)
|
first last end (before split)
|
||||||
|########------------------------------|
|
|########------------------------------|
|
||||||
|
|
||||||
@@ -163,7 +163,7 @@ def test_split_2():
|
|||||||
first last=end |
|
first last=end |
|
||||||
|#################|...... cut extra data.
|
|#################|...... cut extra data.
|
||||||
^
|
^
|
||||||
continuation : None (download complete)
|
continuation : None (extract complete)
|
||||||
|
|
||||||
@fetched patch
|
@fetched patch
|
||||||
|-------- patch --------|
|
|-------- patch --------|
|
||||||
@@ -188,11 +188,11 @@ def test_split_none():
|
|||||||
"""patch.last <= parent_block.last
|
"""patch.last <= parent_block.last
|
||||||
|
|
||||||
While awaiting at run()->asyncdl._fetch()
|
While awaiting at run()->asyncdl._fetch()
|
||||||
downloading parent_block proceeds,
|
fetching parent_block proceeds,
|
||||||
and parent.block.last exceeds patch.first.
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
In this case, fetched patch is all discarded,
|
In this case, fetched patch is all discarded,
|
||||||
and dlworker searches other processing block again.
|
and worker searches other processing block again.
|
||||||
|
|
||||||
~~~~~~ before ~~~~~~
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
Reference in New Issue
Block a user