Move functions
This commit is contained in:
@@ -12,7 +12,6 @@ from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchErr
|
||||
from .. processors.html_archiver import HTMLArchiver
|
||||
from .. tool.extract.extractor import Extractor
|
||||
from .. tool.videoinfo import VideoInfo
|
||||
from .. util.extract_video_id import extract_video_id
|
||||
|
||||
|
||||
class CLIExtractor:
|
||||
@@ -25,7 +24,7 @@ class CLIExtractor:
|
||||
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
||||
|
||||
try:
|
||||
video_id = extract_video_id(video_id)
|
||||
video_id = util.extract_video_id(video_id)
|
||||
separated_path = str(Path(Arguments().output)) + os.path.sep
|
||||
path = util.checkpath(separated_path + video_id + '.html')
|
||||
try:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytchat
|
||||
from ..exceptions import ChatDataFinished, NoContents
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
from ..util import extract_video_id
|
||||
|
||||
|
||||
class Echo:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .pytchat import PytchatCore
|
||||
from .. util.extract_video_id import extract_video_id
|
||||
from .. util import extract_video_id
|
||||
|
||||
|
||||
def create(video_id: str, **kwargs):
|
||||
|
||||
@@ -3,14 +3,13 @@ import json
|
||||
import signal
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
from ..parser.live import Parser
|
||||
from .. import config
|
||||
from .. import exceptions
|
||||
from ..paramgen import liveparam, arcparam
|
||||
from ..processors.default.processor import DefaultProcessor
|
||||
from ..processors.combinator import Combinator
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
from .. import util
|
||||
|
||||
headers = config.headers
|
||||
MAX_RETRY = 10
|
||||
@@ -52,8 +51,6 @@ class PytchatCore:
|
||||
Flag to stop getting chat.
|
||||
'''
|
||||
|
||||
_setup_finished = False
|
||||
|
||||
def __init__(self, video_id,
|
||||
seektime=-1,
|
||||
processor=DefaultProcessor(),
|
||||
@@ -63,7 +60,7 @@ class PytchatCore:
|
||||
hold_exception=True,
|
||||
logger=config.logger(__name__),
|
||||
):
|
||||
self._video_id = extract_video_id(video_id)
|
||||
self._video_id = util.extract_video_id(video_id)
|
||||
self.seektime = seektime
|
||||
if isinstance(processor, tuple):
|
||||
self.processor = Combinator(processor)
|
||||
@@ -78,8 +75,10 @@ class PytchatCore:
|
||||
exception_holder=self._exception_holder
|
||||
)
|
||||
self._first_fetch = True
|
||||
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||
self._fetch_url = config._sml
|
||||
self._topchat_only = topchat_only
|
||||
self._dat = ''
|
||||
self._last_offset_ms = 0
|
||||
self._logger = logger
|
||||
if interruptable:
|
||||
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
|
||||
@@ -114,6 +113,7 @@ class PytchatCore:
|
||||
"chatdata": chatdata
|
||||
}
|
||||
self.continuation = metadata.get('continuation')
|
||||
self._last_offset_ms = metadata.get('last_offset_ms', 0)
|
||||
return chat_component
|
||||
except exceptions.ChatParseException as e:
|
||||
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
||||
@@ -132,39 +132,43 @@ class PytchatCore:
|
||||
'continuationContents' which includes metadata & chat data.
|
||||
'''
|
||||
livechat_json = (
|
||||
self._get_livechat_json(continuation, client, headers)
|
||||
self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
|
||||
)
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
contents, dat = self._parser.get_contents(livechat_json)
|
||||
if self._dat == '' and dat:
|
||||
self._dat = dat
|
||||
if self._first_fetch:
|
||||
if contents is None or self._is_replay:
|
||||
'''Try to fetch archive chat data.'''
|
||||
self._parser.is_replay = True
|
||||
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||
self._fetch_url = config._smr
|
||||
continuation = arcparam.getparam(
|
||||
self._video_id, self.seektime, self._topchat_only)
|
||||
livechat_json = (self._get_livechat_json(continuation, client, headers))
|
||||
livechat_json = (self._get_livechat_json(continuation, client, replay=True, offset_ms=self.seektime * 1000))
|
||||
reload_continuation = self._parser.reload_continuation(
|
||||
self._parser.get_contents(livechat_json))
|
||||
self._parser.get_contents(livechat_json)[0])
|
||||
if reload_continuation:
|
||||
livechat_json = (self._get_livechat_json(
|
||||
reload_continuation, client, headers))
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
contents, _ = self._parser.get_contents(livechat_json)
|
||||
self._is_replay = True
|
||||
self._first_fetch = False
|
||||
return contents
|
||||
|
||||
def _get_livechat_json(self, continuation, client, headers):
|
||||
def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
|
||||
'''
|
||||
Get json which includes chat data.
|
||||
'''
|
||||
continuation = urllib.parse.quote(continuation)
|
||||
livechat_json = None
|
||||
err = None
|
||||
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||
if offset_ms < 0:
|
||||
offset_ms = 0
|
||||
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
|
||||
for _ in range(MAX_RETRY + 1):
|
||||
with client:
|
||||
with httpx.Client(http2=True) as client:
|
||||
try:
|
||||
livechat_json = client.get(url, headers=headers).json()
|
||||
response = client.post(self._fetch_url, json=param)
|
||||
livechat_json = json.loads(response.text, encoding='utf-8')
|
||||
break
|
||||
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
|
||||
err = e
|
||||
|
||||
@@ -5,17 +5,16 @@ import json
|
||||
import signal
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
from asyncio import Queue
|
||||
from concurrent.futures import CancelledError
|
||||
from .buffer import Buffer
|
||||
from ..parser.live import Parser
|
||||
from .. import config
|
||||
from .. import exceptions
|
||||
from .. import util
|
||||
from ..paramgen import liveparam, arcparam
|
||||
from ..processors.default.processor import DefaultProcessor
|
||||
from ..processors.combinator import Combinator
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
MAX_RETRY = 10
|
||||
@@ -84,7 +83,7 @@ class LiveChatAsync:
|
||||
topchat_only=False,
|
||||
logger=config.logger(__name__),
|
||||
):
|
||||
self._video_id = extract_video_id(video_id)
|
||||
self._video_id = util.extract_video_id(video_id)
|
||||
self.seektime = seektime
|
||||
if isinstance(processor, tuple):
|
||||
self.processor = Combinator(processor)
|
||||
@@ -101,8 +100,10 @@ class LiveChatAsync:
|
||||
self._pauser = Queue()
|
||||
self._pauser.put_nowait(None)
|
||||
self._first_fetch = True
|
||||
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||
self._fetch_url = config._sml
|
||||
self._topchat_only = topchat_only
|
||||
self._dat = ''
|
||||
self._last_offset_ms = 0
|
||||
self._logger = logger
|
||||
self.exception = None
|
||||
LiveChatAsync._logger = logger
|
||||
@@ -160,10 +161,8 @@ class LiveChatAsync:
|
||||
async with httpx.AsyncClient(http2=True) as client:
|
||||
while(continuation and self._is_alive):
|
||||
continuation = await self._check_pause(continuation)
|
||||
contents = await self._get_contents(
|
||||
continuation, client, headers)
|
||||
contents = await self._get_contents(continuation, client, headers)
|
||||
metadata, chatdata = self._parser.parse(contents)
|
||||
|
||||
timeout = metadata['timeoutMs'] / 1000
|
||||
chat_component = {
|
||||
"video_id": self._video_id,
|
||||
@@ -183,16 +182,16 @@ class LiveChatAsync:
|
||||
diff_time = timeout - (time.time() - time_mark)
|
||||
await asyncio.sleep(diff_time)
|
||||
continuation = metadata.get('continuation')
|
||||
self._last_offset_ms = metadata.get('last_offset_ms', 0)
|
||||
except exceptions.ChatParseException as e:
|
||||
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
||||
raise
|
||||
except Exception:
|
||||
self._logger.error(f"{traceback.format_exc(limit = -1)}")
|
||||
self._logger.error(f"{traceback.format_exc(limit=-1)}")
|
||||
raise
|
||||
|
||||
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
|
||||
|
||||
|
||||
async def _check_pause(self, continuation):
|
||||
if self._pauser.empty():
|
||||
'''pause'''
|
||||
@@ -215,46 +214,50 @@ class LiveChatAsync:
|
||||
-------
|
||||
'continuationContents' which includes metadata & chatdata.
|
||||
'''
|
||||
livechat_json = await self._get_livechat_json(continuation, client, headers)
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
livechat_json = await self._get_livechat_json(continuation, client, replay=self._is_replay, offset_ms=self._last_offset_ms)
|
||||
contents, dat = self._parser.get_contents(livechat_json)
|
||||
if self._dat == '' and dat:
|
||||
self._dat = dat
|
||||
if self._first_fetch:
|
||||
if contents is None or self._is_replay:
|
||||
'''Try to fetch archive chat data.'''
|
||||
self._parser.is_replay = True
|
||||
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||
self._fetch_url = config._smr
|
||||
continuation = arcparam.getparam(
|
||||
self._video_id, self.seektime, self._topchat_only)
|
||||
livechat_json = (await self._get_livechat_json(
|
||||
continuation, client, headers))
|
||||
continuation, client, replay=True, offset_ms=self.seektime * 1000))
|
||||
reload_continuation = self._parser.reload_continuation(
|
||||
self._parser.get_contents(livechat_json))
|
||||
self._parser.get_contents(livechat_json)[0])
|
||||
if reload_continuation:
|
||||
livechat_json = (await self._get_livechat_json(
|
||||
reload_continuation, client, headers))
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
contents, _ = self._parser.get_contents(livechat_json)
|
||||
self._is_replay = True
|
||||
self._first_fetch = False
|
||||
return contents
|
||||
|
||||
async def _get_livechat_json(self, continuation, client, headers):
|
||||
async def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
|
||||
'''
|
||||
Get json which includes chat data.
|
||||
'''
|
||||
continuation = urllib.parse.quote(continuation)
|
||||
# continuation = urllib.parse.quote(continuation)
|
||||
livechat_json = None
|
||||
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||
if offset_ms < 0:
|
||||
offset_ms = 0
|
||||
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
|
||||
for _ in range(MAX_RETRY + 1):
|
||||
try:
|
||||
resp = await client.get(url, headers=headers)
|
||||
resp = await client.post(self._fetch_url, json=param)
|
||||
livechat_json = resp.json()
|
||||
break
|
||||
except (json.JSONDecodeError, httpx.HTTPError):
|
||||
await asyncio.sleep(1)
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
else:
|
||||
self._logger.error(f"[{self._video_id}]"
|
||||
f"Exceeded retry count.")
|
||||
return None
|
||||
raise exceptions.RetryExceedMaxCount()
|
||||
return livechat_json
|
||||
|
||||
async def _callback_loop(self, callback):
|
||||
@@ -330,9 +333,6 @@ class LiveChatAsync:
|
||||
self.terminate()
|
||||
|
||||
def _task_finished(self):
|
||||
'''
|
||||
Terminate fetching chats.
|
||||
'''
|
||||
if self.is_alive():
|
||||
self.terminate()
|
||||
try:
|
||||
|
||||
@@ -3,7 +3,6 @@ import json
|
||||
import signal
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
from concurrent.futures import CancelledError, ThreadPoolExecutor
|
||||
from queue import Queue
|
||||
from threading import Event
|
||||
@@ -11,10 +10,10 @@ from .buffer import Buffer
|
||||
from ..parser.live import Parser
|
||||
from .. import config
|
||||
from .. import exceptions
|
||||
from .. import util
|
||||
from ..paramgen import liveparam, arcparam
|
||||
from ..processors.default.processor import DefaultProcessor
|
||||
from ..processors.combinator import Combinator
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
|
||||
headers = config.headers
|
||||
MAX_RETRY = 10
|
||||
@@ -84,7 +83,7 @@ class LiveChat:
|
||||
topchat_only=False,
|
||||
logger=config.logger(__name__)
|
||||
):
|
||||
self._video_id = extract_video_id(video_id)
|
||||
self._video_id = util.extract_video_id(video_id)
|
||||
self.seektime = seektime
|
||||
if isinstance(processor, tuple):
|
||||
self.processor = Combinator(processor)
|
||||
@@ -101,8 +100,10 @@ class LiveChat:
|
||||
self._pauser = Queue()
|
||||
self._pauser.put_nowait(None)
|
||||
self._first_fetch = True
|
||||
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||
self._fetch_url = config._sml
|
||||
self._topchat_only = topchat_only
|
||||
self._dat = ''
|
||||
self._last_offset_ms = 0
|
||||
self._event = Event()
|
||||
self._logger = logger
|
||||
self.exception = None
|
||||
@@ -176,6 +177,7 @@ class LiveChat:
|
||||
diff_time = timeout - (time.time() - time_mark)
|
||||
self._event.wait(diff_time if diff_time > 0 else 0)
|
||||
continuation = metadata.get('continuation')
|
||||
self._last_offset_ms = metadata.get('last_offset_ms', 0)
|
||||
except exceptions.ChatParseException as e:
|
||||
self._logger.debug(f"[{self._video_id}]{str(e)}")
|
||||
raise
|
||||
@@ -185,7 +187,6 @@ class LiveChat:
|
||||
|
||||
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
|
||||
|
||||
|
||||
def _check_pause(self, continuation):
|
||||
if self._pauser.empty():
|
||||
'''pause'''
|
||||
@@ -207,39 +208,42 @@ class LiveChat:
|
||||
-------
|
||||
'continuationContents' which includes metadata & chat data.
|
||||
'''
|
||||
livechat_json = (
|
||||
self._get_livechat_json(continuation, client, headers)
|
||||
)
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
livechat_json = self._get_livechat_json(continuation, client, headers)
|
||||
contents, dat = self._parser.get_contents(livechat_json)
|
||||
if self._dat == '' and dat:
|
||||
self._dat = dat
|
||||
if self._first_fetch:
|
||||
if contents is None or self._is_replay:
|
||||
'''Try to fetch archive chat data.'''
|
||||
self._parser.is_replay = True
|
||||
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||
self._fetch_url = config._smr
|
||||
continuation = arcparam.getparam(
|
||||
self._video_id, self.seektime, self._topchat_only)
|
||||
livechat_json = (self._get_livechat_json(continuation, client, headers))
|
||||
livechat_json = (self._get_livechat_json(
|
||||
continuation, client, replay=True, offset_ms=self.seektime * 1000))
|
||||
reload_continuation = self._parser.reload_continuation(
|
||||
self._parser.get_contents(livechat_json))
|
||||
self._parser.get_contents(livechat_json)[0])
|
||||
if reload_continuation:
|
||||
livechat_json = (self._get_livechat_json(
|
||||
reload_continuation, client, headers))
|
||||
contents = self._parser.get_contents(livechat_json)
|
||||
contents, _ = self._parser.get_contents(livechat_json)
|
||||
self._is_replay = True
|
||||
self._first_fetch = False
|
||||
return contents
|
||||
|
||||
def _get_livechat_json(self, continuation, client, headers):
|
||||
def _get_livechat_json(self, continuation, client, replay: bool, offset_ms: int = 0):
|
||||
'''
|
||||
Get json which includes chat data.
|
||||
'''
|
||||
continuation = urllib.parse.quote(continuation)
|
||||
# continuation = urllib.parse.quote(continuation)
|
||||
livechat_json = None
|
||||
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||
if offset_ms < 0:
|
||||
offset_ms = 0
|
||||
param = util.get_param(continuation, dat=self._dat, replay=replay, offsetms=offset_ms)
|
||||
for _ in range(MAX_RETRY + 1):
|
||||
with client:
|
||||
try:
|
||||
livechat_json = client.get(url, headers=headers).json()
|
||||
resp = client.post(self._fetch_url, json=param)
|
||||
livechat_json = resp.json()
|
||||
break
|
||||
except (json.JSONDecodeError, httpx.HTTPError):
|
||||
time.sleep(2)
|
||||
@@ -312,9 +316,6 @@ class LiveChat:
|
||||
self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
|
||||
|
||||
def terminate(self):
|
||||
'''
|
||||
Terminate fetching chats.
|
||||
'''
|
||||
if self._pauser.empty():
|
||||
self._pauser.put_nowait(None)
|
||||
self._is_alive = False
|
||||
|
||||
@@ -4,7 +4,7 @@ from . import duplcheck
|
||||
from .. videoinfo import VideoInfo
|
||||
from ... import config
|
||||
from ... exceptions import InvalidVideoIdException
|
||||
from ... util.extract_video_id import extract_video_id
|
||||
from ... import util
|
||||
|
||||
logger = config.logger(__name__)
|
||||
headers = config.headers
|
||||
@@ -16,7 +16,7 @@ class Extractor:
|
||||
raise ValueError('div must be positive integer.')
|
||||
elif div > 10:
|
||||
div = 10
|
||||
self.video_id = extract_video_id(video_id)
|
||||
self.video_id = util.extract_video_id(video_id)
|
||||
self.div = div
|
||||
self.callback = callback
|
||||
self.processor = processor
|
||||
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
import time
|
||||
from .. import config
|
||||
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
|
||||
from ..util.extract_video_id import extract_video_id
|
||||
from ..util import extract_video_id
|
||||
|
||||
|
||||
headers = config.headers
|
||||
|
||||
Reference in New Issue
Block a user