Fix process
This commit is contained in:
@@ -1,9 +1,13 @@
|
|||||||
import logging # noqa
|
import logging # noqa
|
||||||
from . import mylogger
|
from . import mylogger
|
||||||
|
from base64 import a85decode as dc
|
||||||
headers = {
|
headers = {
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36 Edg/86.0.622.63,gzip(gfe)',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_sml = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG%G#jBOQ!ICLqcS5tQB2;gCZ)?UdXC;f$GR3)MM2<(0>O7mh!,G@+K5?SO9T@okV").decode()
|
||||||
|
_smr = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG%G#jBOQ!iEb03+@<k(QAU-F)8U=fDGsP557S5F7CiNH7;)D3N77^*B6YU@\\?WfBr0emZX=#^").decode()
|
||||||
|
|
||||||
|
|
||||||
def logger(module_name: str, loglevel=None):
|
def logger(module_name: str, loglevel=None):
|
||||||
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
|
||||||
|
|||||||
@@ -28,11 +28,12 @@ class Parser:
|
|||||||
def get_contents(self, jsn):
|
def get_contents(self, jsn):
|
||||||
if jsn is None:
|
if jsn is None:
|
||||||
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
|
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
|
||||||
if jsn['response']['responseContext'].get('errors'):
|
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
|
||||||
raise exceptions.ResponseContextError(
|
raise exceptions.ResponseContextError(
|
||||||
'The video_id would be wrong, or video is deleted or private.')
|
'The video_id would be wrong, or video is deleted or private.')
|
||||||
contents = jsn['response'].get('continuationContents')
|
contents = jsn.get('continuationContents')
|
||||||
return contents
|
visitor_data = jsn.get("responseContext", {}).get("visitorData")
|
||||||
|
return contents, visitor_data
|
||||||
|
|
||||||
def parse(self, contents):
|
def parse(self, contents):
|
||||||
"""
|
"""
|
||||||
@@ -85,6 +86,7 @@ class Parser:
|
|||||||
'''Broadcasting end or cannot fetch chat stream'''
|
'''Broadcasting end or cannot fetch chat stream'''
|
||||||
self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
|
self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
|
||||||
cont = contents['liveChatContinuation']['continuations'][0]
|
cont = contents['liveChatContinuation']['continuations'][0]
|
||||||
|
|
||||||
if cont.get("liveChatReplayContinuationData"):
|
if cont.get("liveChatReplayContinuationData"):
|
||||||
# chat data exist.
|
# chat data exist.
|
||||||
return None
|
return None
|
||||||
@@ -97,23 +99,22 @@ class Parser:
|
|||||||
def _create_data(self, metadata, contents):
|
def _create_data(self, metadata, contents):
|
||||||
actions = contents['liveChatContinuation'].get('actions')
|
actions = contents['liveChatContinuation'].get('actions')
|
||||||
if self.is_replay:
|
if self.is_replay:
|
||||||
interval = self._get_interval(actions)
|
last_offset_ms = self._get_lastoffset(actions)
|
||||||
metadata.setdefault("timeoutMs", interval)
|
metadata.setdefault("timeoutMs", 5000)
|
||||||
|
metadata.setdefault("last_offset_ms", last_offset_ms)
|
||||||
"""Archived chat has different structures than live chat,
|
"""Archived chat has different structures than live chat,
|
||||||
so make it the same format."""
|
so make it the same format."""
|
||||||
chatdata = [action["replayChatItemAction"]["actions"][0]
|
chatdata = [action["replayChatItemAction"]["actions"][0]
|
||||||
for action in actions]
|
for action in actions]
|
||||||
else:
|
else:
|
||||||
metadata.setdefault('timeoutMs', 10000)
|
metadata.setdefault('timeoutMs', 5000)
|
||||||
chatdata = actions
|
chatdata = actions
|
||||||
return metadata, chatdata
|
return metadata, chatdata
|
||||||
|
|
||||||
def _get_interval(self, actions: list):
|
def _get_lastoffset(self, actions: list):
|
||||||
if actions is None:
|
if actions:
|
||||||
return 0
|
return int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
|
||||||
start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
|
return 0
|
||||||
last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
|
|
||||||
return (last - start)
|
|
||||||
|
|
||||||
def raise_exception(self, exception):
|
def raise_exception(self, exception):
|
||||||
if self.exception_holder is None:
|
if self.exception_holder is None:
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import datetime
|
from datetime import datetime, timedelta, timezone
|
||||||
import pytz
|
|
||||||
|
TZ_UTC = timezone(timedelta(0), 'UTC')
|
||||||
|
|
||||||
|
|
||||||
class BaseRenderer:
|
class BaseRenderer:
|
||||||
@@ -62,13 +63,13 @@ class BaseRenderer:
|
|||||||
if badges:
|
if badges:
|
||||||
for badge in badges:
|
for badge in badges:
|
||||||
author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
|
author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
|
||||||
if author_type == '確認済み':
|
if author_type == 'VERIFIED' or author_type == '確認済み':
|
||||||
isVerified = True
|
isVerified = True
|
||||||
if author_type == '所有者':
|
if author_type == 'OWNER' or author_type == '所有者':
|
||||||
isChatOwner = True
|
isChatOwner = True
|
||||||
if 'メンバー' in author_type:
|
if 'メンバー' in author_type or 'MEMBER' in author_type:
|
||||||
isChatSponsor = True
|
isChatSponsor = True
|
||||||
if author_type == 'モデレーター':
|
if author_type == 'MODERATOR' or author_type == 'モデレーター':
|
||||||
isChatModerator = True
|
isChatModerator = True
|
||||||
return isVerified, isChatOwner, isChatSponsor, isChatModerator
|
return isVerified, isChatOwner, isChatSponsor, isChatModerator
|
||||||
|
|
||||||
@@ -76,6 +77,6 @@ class BaseRenderer:
|
|||||||
return self.renderer.get('id')
|
return self.renderer.get('id')
|
||||||
|
|
||||||
def get_publishedat(self, timestamp):
|
def get_publishedat(self, timestamp):
|
||||||
dt = datetime.datetime.fromtimestamp(int(timestamp) / 1000000)
|
dt = datetime.fromtimestamp(int(timestamp) / 1000000)
|
||||||
return dt.astimezone(pytz.utc).isoformat(
|
return dt.astimezone(TZ_UTC).isoformat(
|
||||||
timespec='milliseconds').replace('+00:00', 'Z')
|
timespec='milliseconds').replace('+00:00', 'Z')
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
'''
|
'''
|
||||||
YouTubeスーパーチャットで使用される通貨の記号とレート検索用の略号の
|
Table of symbols for the currencies used in YouTube Superchat.
|
||||||
対応表
|
|
||||||
Key:
|
Key:
|
||||||
YouTubeスーパーチャットで使用される通貨の記号
|
Currency symbols used in YouTube Super Chat
|
||||||
(アルファベットで終わる場合、0xA0(&npsp)が付く)
|
If it ends with an alphabet, it will be followed by 0xA0(&npsp).
|
||||||
Value:
|
Value:
|
||||||
fxtext: 3文字の通貨略称
|
fxtext: ISO 4217 currency code
|
||||||
jptest: 日本語テキスト
|
jptest: japanese text
|
||||||
'''
|
'''
|
||||||
symbols = {
|
symbols = {
|
||||||
"$": {"fxtext": "USD", "jptext": "米・ドル"},
|
"$": {"fxtext": "USD", "jptext": "米・ドル"},
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
import httpx
|
||||||
import socket
|
import socket
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
|
from json import JSONDecodeError
|
||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . worker import ExtractWorker
|
from . worker import ExtractWorker
|
||||||
@@ -8,18 +10,17 @@ from . patch import Patch
|
|||||||
from ... import config
|
from ... import config
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
from ... exceptions import UnknownConnectionError
|
from ... exceptions import UnknownConnectionError
|
||||||
from concurrent.futures import CancelledError
|
from ... util import get_param
|
||||||
from json import JSONDecodeError
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
smr = config._smr
|
||||||
"get_live_chat_replay?continuation="
|
|
||||||
MAX_RETRY_COUNT = 3
|
MAX_RETRY_COUNT = 3
|
||||||
|
|
||||||
# Set to avoid duplicate parameters
|
# Set to avoid duplicate parameters
|
||||||
param_set = set()
|
aquired_params = set()
|
||||||
|
dat = ''
|
||||||
|
|
||||||
|
|
||||||
def _split(start, end, count, min_interval_sec=120):
|
def _split(start, end, count, min_interval_sec=120):
|
||||||
@@ -55,28 +56,30 @@ def _split(start, end, count, min_interval_sec=120):
|
|||||||
|
|
||||||
|
|
||||||
def ready_blocks(video_id, duration, div, callback):
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
param_set.clear()
|
aquired_params.clear()
|
||||||
if div <= 0:
|
if div <= 0:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
async def _get_blocks(video_id, duration, div, callback):
|
async def _get_blocks(video_id, duration, div, callback):
|
||||||
async with httpx.AsyncClient(http2=True) as session:
|
async with httpx.AsyncClient(http2=True, headers=headers) as session:
|
||||||
tasks = [_create_block(session, video_id, seektime, callback)
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
for seektime in _split(-1, duration, div)]
|
for seektime in _split(-1, duration, div)]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
async def _create_block(session, video_id, seektime, callback):
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
continuation = arcparam.getparam(video_id, seektime=seektime)
|
continuation = arcparam.getparam(video_id, seektime=seektime)
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
|
||||||
err = None
|
err = None
|
||||||
|
last_offset = 0
|
||||||
|
global dat
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
if continuation in param_set:
|
if continuation in aquired_params:
|
||||||
next_continuation, actions = None, []
|
next_continuation, actions = None, []
|
||||||
break
|
break
|
||||||
param_set.add(continuation)
|
aquired_params.add(continuation)
|
||||||
resp = await session.get(url, headers=headers, timeout=10)
|
param = get_param(continuation, replay=True, offsetms=seektime * 1000, dat=dat)
|
||||||
next_continuation, actions = parser.parse(resp.json())
|
resp = await session.post(smr, json=param, timeout=10)
|
||||||
|
next_continuation, actions, last_offset, dat = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
@@ -88,15 +91,14 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
raise UnknownConnectionError("Abort:" + str(err))
|
raise UnknownConnectionError("Abort:" + str(err))
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
first = parser.get_offset(actions[0])
|
first_offset = parser.get_offset(actions[0])
|
||||||
last = parser.get_offset(actions[-1])
|
|
||||||
if callback:
|
if callback:
|
||||||
callback(actions, last - first)
|
callback(actions, last_offset - first_offset)
|
||||||
return Block(
|
return Block(
|
||||||
continuation=next_continuation,
|
continuation=next_continuation,
|
||||||
chat_data=actions,
|
chat_data=actions,
|
||||||
first=first,
|
first=first_offset,
|
||||||
last=last
|
last=last_offset
|
||||||
)
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -122,17 +124,19 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
tasks = [worker.run(session) for worker in workers]
|
tasks = [worker.run(session) for worker in workers]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
async def _fetch(continuation, session) -> Patch:
|
async def _fetch(continuation, last_offset, session=None) -> Patch:
|
||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
global dat
|
||||||
err = None
|
err = None
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
if continuation in param_set:
|
if continuation in aquired_params:
|
||||||
continuation, actions = None, []
|
continuation, actions = None, []
|
||||||
break
|
break
|
||||||
param_set.add(continuation)
|
aquired_params.add(continuation)
|
||||||
resp = await session.get(url, headers=config.headers)
|
params = get_param(continuation, replay=True, offsetms=last_offset, dat=dat)
|
||||||
continuation, actions = parser.parse(resp.json())
|
# util.save(json.dumps(params, ensure_ascii=False), "v:/~~/param_"+str(last_offset), ".json")
|
||||||
|
resp = await session.post(smr, json=params)
|
||||||
|
continuation, actions, last_offset, dat = parser.parse(resp.json())
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
@@ -147,7 +151,7 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
raise UnknownConnectionError("Abort:" + str(err))
|
raise UnknownConnectionError("Abort:" + str(err))
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
last = parser.get_offset(actions[-1])
|
last = last_offset
|
||||||
first = parser.get_offset(actions[0])
|
first = parser.get_offset(actions[0])
|
||||||
if callback:
|
if callback:
|
||||||
callback(actions, last - first)
|
callback(actions, last - first)
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ def parse(jsn):
|
|||||||
"""
|
"""
|
||||||
if jsn is None:
|
if jsn is None:
|
||||||
raise ValueError("parameter JSON is None")
|
raise ValueError("parameter JSON is None")
|
||||||
if jsn['response']['responseContext'].get('errors'):
|
if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
|
||||||
raise exceptions.ResponseContextError(
|
raise exceptions.ResponseContextError(
|
||||||
'video_id is invalid or private/deleted.')
|
'video_id is invalid or private/deleted.')
|
||||||
contents = jsn['response'].get('continuationContents')
|
contents = jsn.get('continuationContents')
|
||||||
if contents is None:
|
if contents is None:
|
||||||
raise exceptions.NoContents('No chat data.')
|
raise exceptions.NoContents('No chat data.')
|
||||||
|
|
||||||
@@ -31,13 +31,15 @@ def parse(jsn):
|
|||||||
raise exceptions.NoContinuation('No Continuation')
|
raise exceptions.NoContinuation('No Continuation')
|
||||||
metadata = cont.get('liveChatReplayContinuationData')
|
metadata = cont.get('liveChatReplayContinuationData')
|
||||||
if metadata:
|
if metadata:
|
||||||
|
visitor_data = jsn.get("responseContext", {}).get("visitorData", '')
|
||||||
continuation = metadata.get("continuation")
|
continuation = metadata.get("continuation")
|
||||||
actions = contents['liveChatContinuation'].get('actions')
|
actions: list = contents['liveChatContinuation'].get('actions')
|
||||||
return continuation, actions
|
last_offset: int = get_offset(actions[-1]) if actions else 0
|
||||||
return None, []
|
return continuation, actions, last_offset, visitor_data
|
||||||
|
return None, [], 0, ''
|
||||||
|
|
||||||
|
|
||||||
def get_offset(item):
|
def get_offset(item) -> int:
|
||||||
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
|
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class ExtractWorker:
|
|||||||
async def run(self, session):
|
async def run(self, session):
|
||||||
while self.block.continuation:
|
while self.block.continuation:
|
||||||
patch = await self.fetch(
|
patch = await self.fetch(
|
||||||
self.block.continuation, session)
|
self.block.continuation, self.block.last, session)
|
||||||
if patch.continuation is None:
|
if patch.continuation is None:
|
||||||
"""TODO : make the worker assigned to the last block
|
"""TODO : make the worker assigned to the last block
|
||||||
to work more than twice as possible.
|
to work more than twice as possible.
|
||||||
|
|||||||
@@ -4,9 +4,18 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from .. import config
|
from .. import config
|
||||||
|
from .. exceptions import InvalidVideoIdException
|
||||||
|
|
||||||
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
|
||||||
|
PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
|
||||||
|
|
||||||
|
YT_VIDEO_ID_LENGTH = 11
|
||||||
|
|
||||||
|
CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
|
||||||
|
|
||||||
|
UA = config.headers["user-agent"]
|
||||||
|
|
||||||
|
|
||||||
def extract(url):
|
def extract(url):
|
||||||
_session = httpx.Client(http2=True)
|
_session = httpx.Client(http2=True)
|
||||||
@@ -17,8 +26,9 @@ def extract(url):
|
|||||||
|
|
||||||
|
|
||||||
def save(data, filename, extention) -> str:
|
def save(data, filename, extention) -> str:
|
||||||
save_filename = filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
|
save_filename = filename + "_" + \
|
||||||
with open(save_filename ,mode='w', encoding='utf-8') as f:
|
(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
|
||||||
|
with open(save_filename, mode='w', encoding='utf-8') as f:
|
||||||
f.writelines(data)
|
f.writelines(data)
|
||||||
return save_filename
|
return save_filename
|
||||||
|
|
||||||
@@ -39,3 +49,46 @@ def checkpath(filepath):
|
|||||||
body = f'{body}({str(counter)})'
|
body = f'{body}({str(counter)})'
|
||||||
newpath = os.path.join(os.path.dirname(filepath), body + extention)
|
newpath = os.path.join(os.path.dirname(filepath), body + extention)
|
||||||
return newpath
|
return newpath
|
||||||
|
|
||||||
|
|
||||||
|
def get_param(continuation, replay=False, offsetms: int = 0, dat=''):
|
||||||
|
if offsetms < 0:
|
||||||
|
offsetms = 0
|
||||||
|
ret = {
|
||||||
|
"context": {
|
||||||
|
"client": {
|
||||||
|
"visitorData": dat,
|
||||||
|
"userAgent": UA,
|
||||||
|
"clientName": "WEB",
|
||||||
|
"clientVersion": CLIENT_VERSION,
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
"continuation": continuation,
|
||||||
|
}
|
||||||
|
if replay:
|
||||||
|
ret.setdefault("currentPlayerState", {
|
||||||
|
"playerOffsetMs": str(int(offsetms))})
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_id(url_or_id: str) -> str:
|
||||||
|
ret = ''
|
||||||
|
if '[' in url_or_id:
|
||||||
|
url_or_id = url_or_id.replace('[', '').replace(']', '')
|
||||||
|
|
||||||
|
if type(url_or_id) != str:
|
||||||
|
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
|
||||||
|
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
|
||||||
|
return url_or_id
|
||||||
|
match = re.search(PATTERN_YTURL, url_or_id)
|
||||||
|
if match is None:
|
||||||
|
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
|
||||||
|
try:
|
||||||
|
ret = match.group(4)
|
||||||
|
except IndexError:
|
||||||
|
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
|
||||||
|
|
||||||
|
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
|
||||||
|
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
|
||||||
|
return ret
|
||||||
|
|||||||
Reference in New Issue
Block a user