Move functions

This commit is contained in:
taizan-hokouto
2020-12-05 14:39:55 +09:00
parent 4db9486853
commit bc3f16e86b
8 changed files with 80 additions and 76 deletions

View File

@@ -12,7 +12,6 @@ from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchErr
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
class CLIExtractor:
@@ -25,7 +24,7 @@ class CLIExtractor:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try:
video_id = extract_video_id(video_id)
video_id = util.extract_video_id(video_id)
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
try:
@@ -118,4 +117,4 @@ def clear_tasks():
except Exception as e:
print(str(e))
if Arguments().debug:
traceback.print_exc()
traceback.print_exc()

View File

@@ -1,6 +1,6 @@
import pytchat
from ..exceptions import ChatDataFinished, NoContents
from ..util.extract_video_id import extract_video_id
from ..util import extract_video_id
class Echo:

View File

@@ -1,5 +1,5 @@
from .pytchat import PytchatCore
from .. util.extract_video_id import extract_video_id
from .. util import extract_video_id
def create(video_id: str, **kwargs):

View File

@@ -3,14 +3,13 @@ import json
import signal
import time
import traceback
import urllib.parse
from ..parser.live import Parser
from .. import config
from .. import exceptions
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
from .. import util
headers = config.headers
MAX_RETRY = 10
@@ -52,8 +51,6 @@ class PytchatCore:
Flag to stop getting chat.
'''
_setup_finished = False
def __init__(self, video_id,
seektime=-1,
processor=DefaultProcessor(),
@@ -63,7 +60,7 @@ class PytchatCore:
hold_exception=True,
logger=config.logger(__name__),
):
self._video_id = extract_video_id(video_id)
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
@@ -78,8 +75,10 @@ class PytchatCore:
exception_holder=self._exception_holder
)
self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation="
self._fetch_url = config._sml
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger
if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
@@ -91,7 +90,7 @@ class PytchatCore:
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
def _get_chat_component(self):
''' Fetch chat data and store them into buffer,
@@ -114,6 +113,7 @@ class PytchatCore:
"chatdata": chatdata
}
self.continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
return chat_component
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
@@ -132,39 +132,43 @@ class PytchatCore:
'continuationContents' which includes metadata & chat data.
'''
livechat_json = (
self._get_livechat_json(continuation, client, headers)
self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
)
contents = self._parser.get_contents(livechat_json)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
if self._first_fetch:
if contents is None or self._is_replay:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers))
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
self._parser.get_contents(livechat_json)[0])
if reload_continuation:
livechat_json = (self._get_livechat_json(
reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json)
contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True
self._first_fetch = False
return contents
def _get_livechat_json(self, continuation, client, headers):
def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
'''
Get json which includes chat data.
'''
continuation = urllib.parse.quote(continuation)
livechat_json = None
err = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
with client:
with httpx.Client(http2=True) as client:
try:
livechat_json = client.get(url, headers=headers).json()
response = client.post(self._fetch_url, json=param)
livechat_json = json.loads(response.text, encoding='utf-8')
break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e

View File

@@ -5,17 +5,16 @@ import json
import signal
import time
import traceback
import urllib.parse
from asyncio import Queue
from concurrent.futures import CancelledError
from .buffer import Buffer
from ..parser.live import Parser
from .. import config
from .. import exceptions
from .. import util
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
@@ -84,7 +83,7 @@ class LiveChatAsync:
topchat_only=False,
logger=config.logger(__name__),
):
self._video_id = extract_video_id(video_id)
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
@@ -101,8 +100,10 @@ class LiveChatAsync:
self._pauser = Queue()
self._pauser.put_nowait(None)
self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation="
self._fetch_url = config._sml
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger
self.exception = None
LiveChatAsync._logger = logger
@@ -160,10 +161,8 @@ class LiveChatAsync:
async with httpx.AsyncClient(http2=True) as client:
while(continuation and self._is_alive):
continuation = await self._check_pause(continuation)
contents = await self._get_contents(
continuation, client, headers)
contents = await self._get_contents(continuation, client, headers)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self._video_id,
@@ -183,16 +182,16 @@ class LiveChatAsync:
diff_time = timeout - (time.time() - time_mark)
await asyncio.sleep(diff_time)
continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
raise
except Exception:
self._logger.error(f"{traceback.format_exc(limit = -1)}")
self._logger.error(f"{traceback.format_exc(limit=-1)}")
raise
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
async def _check_pause(self, continuation):
if self._pauser.empty():
'''pause'''
@@ -215,46 +214,50 @@ class LiveChatAsync:
-------
'continuationContents' which includes metadata & chatdata.
'''
livechat_json = await self._get_livechat_json(continuation, client, headers)
contents = self._parser.get_contents(livechat_json)
livechat_json = await self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
if self._first_fetch:
if contents is None or self._is_replay:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (await self._get_livechat_json(
continuation, client, headers))
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
self._parser.get_contents(livechat_json)[0])
if reload_continuation:
livechat_json = (await self._get_livechat_json(
reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json)
contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True
self._first_fetch = False
return contents
async def _get_livechat_json(self, continuation, client, headers):
async def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
'''
Get json which includes chat data.
'''
continuation = urllib.parse.quote(continuation)
# continuation = urllib.parse.quote(continuation)
livechat_json = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
try:
resp = await client.get(url, headers=headers)
resp = await client.post(self._fetch_url, json=param)
livechat_json = resp.json()
break
except (json.JSONDecodeError, httpx.HTTPError):
await asyncio.sleep(1)
await asyncio.sleep(2)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count.")
return None
raise exceptions.RetryExceedMaxCount()
return livechat_json
async def _callback_loop(self, callback):
@@ -330,9 +333,6 @@ class LiveChatAsync:
self.terminate()
def _task_finished(self):
'''
Terminate fetching chats.
'''
if self.is_alive():
self.terminate()
try:

View File

@@ -3,7 +3,6 @@ import json
import signal
import time
import traceback
import urllib.parse
from concurrent.futures import CancelledError, ThreadPoolExecutor
from queue import Queue
from threading import Event
@@ -11,10 +10,10 @@ from .buffer import Buffer
from ..parser.live import Parser
from .. import config
from .. import exceptions
from .. import util
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
@@ -84,7 +83,7 @@ class LiveChat:
topchat_only=False,
logger=config.logger(__name__)
):
self._video_id = extract_video_id(video_id)
self._video_id = util.extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
@@ -101,8 +100,10 @@ class LiveChat:
self._pauser = Queue()
self._pauser.put_nowait(None)
self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation="
self._fetch_url = config._sml
self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._event = Event()
self._logger = logger
self.exception = None
@@ -176,6 +177,7 @@ class LiveChat:
diff_time = timeout - (time.time() - time_mark)
self._event.wait(diff_time if diff_time > 0 else 0)
continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
raise
@@ -185,7 +187,6 @@ class LiveChat:
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
def _check_pause(self, continuation):
if self._pauser.empty():
'''pause'''
@@ -207,43 +208,46 @@ class LiveChat:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = (
self._get_livechat_json(continuation, client, headers)
)
contents = self._parser.get_contents(livechat_json)
livechat_json = self._get_livechat_json(continuation, client, headers)
contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
if self._first_fetch:
if contents is None or self._is_replay:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
self._fetch_url = config._smr
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers))
livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
self._parser.get_contents(livechat_json)[0])
if reload_continuation:
livechat_json = (self._get_livechat_json(
reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json)
contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True
self._first_fetch = False
return contents
def _get_livechat_json(self, continuation, client, headers):
def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
'''
Get json which includes chat data.
'''
continuation = urllib.parse.quote(continuation)
# continuation = urllib.parse.quote(continuation)
livechat_json = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1):
with client:
try:
livechat_json = client.get(url, headers=headers).json()
break
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)
continue
try:
resp = client.post(self._fetch_url, json=param)
livechat_json = resp.json()
break
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count.")
@@ -312,9 +316,6 @@ class LiveChat:
self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self):
'''
Terminate fetching chats.
'''
if self._pauser.empty():
self._pauser.put_nowait(None)
self._is_alive = False

View File

@@ -4,7 +4,7 @@ from . import duplcheck
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
from ... util.extract_video_id import extract_video_id
from ... import util
logger = config.logger(__name__)
headers = config.headers
@@ -16,7 +16,7 @@ class Extractor:
raise ValueError('div must be positive integer.')
elif div > 10:
div = 10
self.video_id = extract_video_id(video_id)
self.video_id = util.extract_video_id(video_id)
self.div = div
self.callback = callback
self.processor = processor

View File

@@ -4,7 +4,7 @@ import re
import time
from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id
from ..util import extract_video_id
headers = config.headers