Move functions

This commit is contained in:
taizan-hokouto
2020-12-05 14:39:55 +09:00
parent 4db9486853
commit bc3f16e86b
8 changed files with 80 additions and 76 deletions

View File

@@ -12,7 +12,6 @@ from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchErr
from .. processors.html_archiver import HTMLArchiver from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
class CLIExtractor: class CLIExtractor:
@@ -25,7 +24,7 @@ class CLIExtractor:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}") print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try: try:
video_id = extract_video_id(video_id) video_id = util.extract_video_id(video_id)
separated_path = str(Path(Arguments().output)) + os.path.sep separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html') path = util.checkpath(separated_path + video_id + '.html')
try: try:

View File

@@ -1,6 +1,6 @@
import pytchat import pytchat
from ..exceptions import ChatDataFinished, NoContents from ..exceptions import ChatDataFinished, NoContents
from ..util.extract_video_id import extract_video_id from ..util import extract_video_id
class Echo: class Echo:

View File

@@ -1,5 +1,5 @@
from .pytchat import PytchatCore from .pytchat import PytchatCore
from .. util.extract_video_id import extract_video_id from .. util import extract_video_id
def create(video_id: str, **kwargs): def create(video_id: str, **kwargs):

View File

@@ -3,14 +3,13 @@ import json
import signal import signal
import time import time
import traceback import traceback
import urllib.parse
from ..parser.live import Parser from ..parser.live import Parser
from .. import config from .. import config
from .. import exceptions from .. import exceptions
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id from .. import util
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
@@ -52,8 +51,6 @@ class PytchatCore:
Flag to stop getting chat. Flag to stop getting chat.
''' '''
_setup_finished = False
def __init__(self, video_id, def __init__(self, video_id,
seektime=-1, seektime=-1,
processor=DefaultProcessor(), processor=DefaultProcessor(),
@@ -63,7 +60,7 @@ class PytchatCore:
hold_exception=True, hold_exception=True,
logger=config.logger(__name__), logger=config.logger(__name__),
): ):
self._video_id = extract_video_id(video_id) self._video_id = util.extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)
@@ -78,8 +75,10 @@ class PytchatCore:
exception_holder=self._exception_holder exception_holder=self._exception_holder
) )
self._first_fetch = True self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation=" self._fetch_url = config._sml
self._topchat_only = topchat_only self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger self._logger = logger
if interruptable: if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate()) signal.signal(signal.SIGINT, lambda a, b: self.terminate())
@@ -114,6 +113,7 @@ class PytchatCore:
"chatdata": chatdata "chatdata": chatdata
} }
self.continuation = metadata.get('continuation') self.continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
return chat_component return chat_component
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
@@ -132,39 +132,43 @@ class PytchatCore:
'continuationContents' which includes metadata & chat data. 'continuationContents' which includes metadata & chat data.
''' '''
livechat_json = ( livechat_json = (
self._get_livechat_json(continuation, client, headers) self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
) )
contents = self._parser.get_contents(livechat_json) contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
if self._first_fetch: if self._first_fetch:
if contents is None or self._is_replay: if contents is None or self._is_replay:
'''Try to fetch archive chat data.''' '''Try to fetch archive chat data.'''
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers)) livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)) self._parser.get_contents(livechat_json)[0])
if reload_continuation: if reload_continuation:
livechat_json = (self._get_livechat_json( livechat_json = (self._get_livechat_json(
reload_continuation, client, headers)) reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json) contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True self._is_replay = True
self._first_fetch = False self._first_fetch = False
return contents return contents
def _get_livechat_json(self, continuation, client, headers): def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
''' '''
Get json which includes chat data. Get json which includes chat data.
''' '''
continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
err = None err = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
with client: with httpx.Client(http2=True) as client:
try: try:
livechat_json = client.get(url, headers=headers).json() response = client.post(self._fetch_url, json=param)
livechat_json = json.loads(response.text, encoding='utf-8')
break break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e: except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e err = e

View File

@@ -5,17 +5,16 @@ import json
import signal import signal
import time import time
import traceback import traceback
import urllib.parse
from asyncio import Queue from asyncio import Queue
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from .buffer import Buffer from .buffer import Buffer
from ..parser.live import Parser from ..parser.live import Parser
from .. import config from .. import config
from .. import exceptions from .. import exceptions
from .. import util
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
@@ -84,7 +83,7 @@ class LiveChatAsync:
topchat_only=False, topchat_only=False,
logger=config.logger(__name__), logger=config.logger(__name__),
): ):
self._video_id = extract_video_id(video_id) self._video_id = util.extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)
@@ -101,8 +100,10 @@ class LiveChatAsync:
self._pauser = Queue() self._pauser = Queue()
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
self._first_fetch = True self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation=" self._fetch_url = config._sml
self._topchat_only = topchat_only self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._logger = logger self._logger = logger
self.exception = None self.exception = None
LiveChatAsync._logger = logger LiveChatAsync._logger = logger
@@ -160,10 +161,8 @@ class LiveChatAsync:
async with httpx.AsyncClient(http2=True) as client: async with httpx.AsyncClient(http2=True) as client:
while(continuation and self._is_alive): while(continuation and self._is_alive):
continuation = await self._check_pause(continuation) continuation = await self._check_pause(continuation)
contents = await self._get_contents( contents = await self._get_contents(continuation, client, headers)
continuation, client, headers)
metadata, chatdata = self._parser.parse(contents) metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs'] / 1000 timeout = metadata['timeoutMs'] / 1000
chat_component = { chat_component = {
"video_id": self._video_id, "video_id": self._video_id,
@@ -183,6 +182,7 @@ class LiveChatAsync:
diff_time = timeout - (time.time() - time_mark) diff_time = timeout - (time.time() - time_mark)
await asyncio.sleep(diff_time) await asyncio.sleep(diff_time)
continuation = metadata.get('continuation') continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
raise raise
@@ -192,7 +192,6 @@ class LiveChatAsync:
self._logger.debug(f"[{self._video_id}] finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
async def _check_pause(self, continuation): async def _check_pause(self, continuation):
if self._pauser.empty(): if self._pauser.empty():
'''pause''' '''pause'''
@@ -215,46 +214,50 @@ class LiveChatAsync:
------- -------
'continuationContents' which includes metadata & chatdata. 'continuationContents' which includes metadata & chatdata.
''' '''
livechat_json = await self._get_livechat_json(continuation, client, headers) livechat_json = await self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
contents = self._parser.get_contents(livechat_json) contents, dat = self._parser.get_contents(livechat_json)
if self._dat == '' and dat:
self._dat = dat
if self._first_fetch: if self._first_fetch:
if contents is None or self._is_replay: if contents is None or self._is_replay:
'''Try to fetch archive chat data.''' '''Try to fetch archive chat data.'''
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only)
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
continuation, client, headers)) continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)) self._parser.get_contents(livechat_json)[0])
if reload_continuation: if reload_continuation:
livechat_json = (await self._get_livechat_json( livechat_json = (await self._get_livechat_json(
reload_continuation, client, headers)) reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json) contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True self._is_replay = True
self._first_fetch = False self._first_fetch = False
return contents return contents
async def _get_livechat_json(self, continuation, client, headers): async def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
''' '''
Get json which includes chat data. Get json which includes chat data.
''' '''
continuation = urllib.parse.quote(continuation) # continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
try: try:
resp = await client.get(url, headers=headers) resp = await client.post(self._fetch_url, json=param)
livechat_json = resp.json() livechat_json = resp.json()
break break
except (json.JSONDecodeError, httpx.HTTPError): except (json.JSONDecodeError, httpx.HTTPError):
await asyncio.sleep(1) await asyncio.sleep(2)
continue continue
else: else:
self._logger.error(f"[{self._video_id}]" self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count.") f"Exceeded retry count.")
return None raise exceptions.RetryExceedMaxCount()
return livechat_json return livechat_json
async def _callback_loop(self, callback): async def _callback_loop(self, callback):
@@ -330,9 +333,6 @@ class LiveChatAsync:
self.terminate() self.terminate()
def _task_finished(self): def _task_finished(self):
'''
Terminate fetching chats.
'''
if self.is_alive(): if self.is_alive():
self.terminate() self.terminate()
try: try:

View File

@@ -3,7 +3,6 @@ import json
import signal import signal
import time import time
import traceback import traceback
import urllib.parse
from concurrent.futures import CancelledError, ThreadPoolExecutor from concurrent.futures import CancelledError, ThreadPoolExecutor
from queue import Queue from queue import Queue
from threading import Event from threading import Event
@@ -11,10 +10,10 @@ from .buffer import Buffer
from ..parser.live import Parser from ..parser.live import Parser
from .. import config from .. import config
from .. import exceptions from .. import exceptions
from .. import util
from ..paramgen import liveparam, arcparam from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
MAX_RETRY = 10 MAX_RETRY = 10
@@ -84,7 +83,7 @@ class LiveChat:
topchat_only=False, topchat_only=False,
logger=config.logger(__name__) logger=config.logger(__name__)
): ):
self._video_id = extract_video_id(video_id) self._video_id = util.extract_video_id(video_id)
self.seektime = seektime self.seektime = seektime
if isinstance(processor, tuple): if isinstance(processor, tuple):
self.processor = Combinator(processor) self.processor = Combinator(processor)
@@ -101,8 +100,10 @@ class LiveChat:
self._pauser = Queue() self._pauser = Queue()
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
self._first_fetch = True self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation=" self._fetch_url = config._sml
self._topchat_only = topchat_only self._topchat_only = topchat_only
self._dat = ''
self._last_offset_ms = 0
self._event = Event() self._event = Event()
self._logger = logger self._logger = logger
self.exception = None self.exception = None
@@ -176,6 +177,7 @@ class LiveChat:
diff_time = timeout - (time.time() - time_mark) diff_time = timeout - (time.time() - time_mark)
self._event.wait(diff_time if diff_time > 0 else 0) self._event.wait(diff_time if diff_time > 0 else 0)
continuation = metadata.get('continuation') continuation = metadata.get('continuation')
self._last_offset_ms = metadata.get('last_offset_ms', 0)
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
raise raise
@@ -185,7 +187,6 @@ class LiveChat:
self._logger.debug(f"[{self._video_id}] finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
def _check_pause(self, continuation): def _check_pause(self, continuation):
if self._pauser.empty(): if self._pauser.empty():
'''pause''' '''pause'''
@@ -207,39 +208,42 @@ class LiveChat:
------- -------
'continuationContents' which includes metadata & chat data. 'continuationContents' which includes metadata & chat data.
''' '''
livechat_json = ( livechat_json = self._get_livechat_json(continuation, client, headers)
self._get_livechat_json(continuation, client, headers) contents, dat = self._parser.get_contents(livechat_json)
) if self._dat == '' and dat:
contents = self._parser.get_contents(livechat_json) self._dat = dat
if self._first_fetch: if self._first_fetch:
if contents is None or self._is_replay: if contents is None or self._is_replay:
'''Try to fetch archive chat data.''' '''Try to fetch archive chat data.'''
self._parser.is_replay = True self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation=" self._fetch_url = config._smr
continuation = arcparam.getparam( continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only) self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers)) livechat_json = (self._get_livechat_json(
continuation, client, replay=True, offset_ms=self.seektime * 1000))
reload_continuation = self._parser.reload_continuation( reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json)) self._parser.get_contents(livechat_json)[0])
if reload_continuation: if reload_continuation:
livechat_json = (self._get_livechat_json( livechat_json = (self._get_livechat_json(
reload_continuation, client, headers)) reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json) contents, _ = self._parser.get_contents(livechat_json)
self._is_replay = True self._is_replay = True
self._first_fetch = False self._first_fetch = False
return contents return contents
def _get_livechat_json(self, continuation, client, headers): def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
''' '''
Get json which includes chat data. Get json which includes chat data.
''' '''
continuation = urllib.parse.quote(continuation) # continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" if offset_ms < 0:
offset_ms = 0
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
with client:
try: try:
livechat_json = client.get(url, headers=headers).json() resp = client.post(self._fetch_url, json=param)
livechat_json = resp.json()
break break
except (json.JSONDecodeError, httpx.HTTPError): except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2) time.sleep(2)
@@ -312,9 +316,6 @@ class LiveChat:
self._logger.debug(f'[{self._video_id}] cancelled:{sender}') self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self): def terminate(self):
'''
Terminate fetching chats.
'''
if self._pauser.empty(): if self._pauser.empty():
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
self._is_alive = False self._is_alive = False

View File

@@ -4,7 +4,7 @@ from . import duplcheck
from .. videoinfo import VideoInfo from .. videoinfo import VideoInfo
from ... import config from ... import config
from ... exceptions import InvalidVideoIdException from ... exceptions import InvalidVideoIdException
from ... util.extract_video_id import extract_video_id from ... import util
logger = config.logger(__name__) logger = config.logger(__name__)
headers = config.headers headers = config.headers
@@ -16,7 +16,7 @@ class Extractor:
raise ValueError('div must be positive integer.') raise ValueError('div must be positive integer.')
elif div > 10: elif div > 10:
div = 10 div = 10
self.video_id = extract_video_id(video_id) self.video_id = util.extract_video_id(video_id)
self.div = div self.div = div
self.callback = callback self.callback = callback
self.processor = processor self.processor = processor

View File

@@ -4,7 +4,7 @@ import re
import time import time
from .. import config from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id from ..util import extract_video_id
headers = config.headers headers = config.headers