Format code

This commit is contained in:
taizan-hokuto
2020-06-04 23:10:26 +09:00
parent e6dbc8772e
commit 2474207691
50 changed files with 635 additions and 622 deletions

View File

@@ -27,4 +27,6 @@ from .api import (
SpeedCalculator,
SuperchatCalculator,
VideoInfo
)
)
# flake8: noqa

View File

@@ -14,3 +14,5 @@ from .processors.speed.calculator import SpeedCalculator
from .processors.superchat.calculator import SuperchatCalculator
from .tool.extract.extractor import Extractor
from .tool.videoinfo import VideoInfo
# flake8: noqa

View File

@@ -1,33 +1,31 @@
import argparse
import os
from pathlib import Path
from typing import List, Callable
from .arguments import Arguments
from .. exceptions import InvalidVideoIdException, NoContentsException
from .. processors.tsv_archiver import TSVArchiver
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. import __version__
'''
Most of CLI modules refer to
Most of CLI modules refer to
Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
def main():
# Arguments
# Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
help='Video IDs separated by commas without space.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
help='Video IDs separated by commas without space.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./')
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version')
help='Settings version')
Arguments(parser.parse_args().__dict__)
if Arguments().print_version:
print(f'pytchat v{__version__}')
@@ -37,24 +35,26 @@ def main():
if Arguments().video_ids:
for video_id in Arguments().video_ids:
if '[' in video_id:
video_id = video_id.replace('[','').replace(']','')
video_id = video_id.replace('[', '').replace(']', '')
try:
info = VideoInfo(video_id)
print(f"Extracting...\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
path = Path(Arguments().output+video_id+'.html')
path = Path(Arguments().output + video_id + '.html')
print(f"output path: {path.resolve()}")
Extractor(video_id,
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
callback = _disp_progress
).extract()
Extractor(video_id,
processor=HTMLArchiver(
Arguments().output + video_id + '.html'),
callback=_disp_progress
).extract()
print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContentsException) as e:
print(e)
return
parser.print_help()
def _disp_progress(a,b):
print('.',end="",flush=True)
def _disp_progress(a, b):
print('.', end="", flush=True)

View File

@@ -2,12 +2,13 @@ from typing import Optional, Dict, Union, List
from .singleton import Singleton
'''
This modules refer to
This modules refer to
Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
class Arguments(metaclass=Singleton):
"""
Arguments singleton
@@ -18,11 +19,11 @@ class Arguments(metaclass=Singleton):
OUTPUT: str = 'output'
VIDEO: str = 'video'
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
"""
Initialize arguments
:param arguments: Arguments from cli
:param arguments: Arguments from cli
(Optional to call singleton instance without parameters)
"""
@@ -35,5 +36,5 @@ class Arguments(metaclass=Singleton):
self.video_ids: List[int] = []
# Videos
if arguments[Arguments.Name.VIDEO]:
self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO].split(',')]
self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO].split(',')]

View File

@@ -1,9 +1,11 @@
'''
This modules refer to
This modules refer to
Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
class Singleton(type):
"""
Abstract class for singletons
@@ -16,4 +18,4 @@ class Singleton(type):
return cls._instances[cls]
def get_instance(cls, *args, **kwargs):
cls.__call__(*args, **kwargs)
cls.__call__(*args, **kwargs)

View File

@@ -1,11 +1,9 @@
import logging
from . import mylogger
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
def logger(module_name: str, loglevel = None):
module_logger = mylogger.get_logger(module_name, loglevel = loglevel)
def logger(module_name: str, loglevel=None):
module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
return module_logger

View File

@@ -1,31 +1,31 @@
from logging import NullHandler, getLogger, StreamHandler, FileHandler, Formatter
from logging import NullHandler, getLogger, StreamHandler, FileHandler
import logging
from datetime import datetime
def get_logger(modname,loglevel=logging.DEBUG):
def get_logger(modname, loglevel=logging.DEBUG):
logger = getLogger(modname)
if loglevel == None:
if loglevel is None:
logger.addHandler(NullHandler())
return logger
logger.setLevel(loglevel)
#create handler1 for showing info
# create handler1 for showing info
handler1 = StreamHandler()
my_formatter = MyFormatter()
my_formatter = MyFormatter()
handler1.setFormatter(my_formatter)
handler1.setLevel(loglevel)
handler1.setLevel(loglevel)
logger.addHandler(handler1)
#create handler2 for recording log file
# create handler2 for recording log file
if loglevel <= logging.DEBUG:
handler2 = FileHandler(filename="log.txt", encoding='utf-8')
handler2.setLevel(logging.ERROR)
handler2.setFormatter(my_formatter)
logger.addHandler(handler2)
return logger
class MyFormatter(logging.Formatter):
def format(self, record):
timestamp = (
@@ -35,4 +35,4 @@ class MyFormatter(logging.Formatter):
lineno = str(record.lineno).rjust(4)
message = record.getMessage()
return timestamp+'| '+module+' { '+funcname+':'+lineno+'} - '+message
return timestamp + '| ' + module + ' { ' + funcname + ':' + lineno + '} - ' + message

View File

@@ -1,5 +1,7 @@
import asyncio
class Buffer(asyncio.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
@@ -10,19 +12,20 @@ class Buffer(asyncio.Queue):
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
'''
def __init__(self,maxsize = 0):
def __init__(self, maxsize=0):
super().__init__(maxsize)
async def put(self,item):
async def put(self, item):
if item is None:
return
return
if super().full():
super().get_nowait()
await super().put(item)
def put_nowait(self,item):
def put_nowait(self, item):
if item is None:
return
return
if super().full():
super().get_nowait()
super().put_nowait(item)
@@ -32,4 +35,4 @@ class Buffer(asyncio.Queue):
ret.append(await super().get())
while not super().empty():
ret.append(super().get_nowait())
return ret
return ret

View File

@@ -169,7 +169,7 @@ class LiveChatAsync:
continuation, session, headers)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs']/1000
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self.video_id,
"timeout": timeout,
@@ -177,14 +177,15 @@ class LiveChatAsync:
}
time_mark = time.time()
if self._direct_mode:
processed_chat = self.processor.process([chat_component])
processed_chat = self.processor.process(
[chat_component])
if isinstance(processed_chat, tuple):
await self._callback(*processed_chat)
else:
await self._callback(processed_chat)
else:
await self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark)
diff_time = timeout - (time.time() - time_mark)
await asyncio.sleep(diff_time)
continuation = metadata.get('continuation')
except ChatParseException as e:

View File

@@ -1,6 +1,7 @@
import queue
class Buffer(queue.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
@@ -11,28 +12,29 @@ class Buffer(queue.Queue):
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
'''
def __init__(self,maxsize = 0):
def __init__(self, maxsize=0):
super().__init__(maxsize=maxsize)
def put(self,item):
def put(self, item):
if item is None:
return
return
if super().full():
super().get_nowait()
else:
super().put(item)
def put_nowait(self,item):
def put_nowait(self, item):
if item is None:
return
return
if super().full():
super().get_nowait()
else:
super().put_nowait(item)
def get(self):
ret = []
ret.append(super().get())
while not super().empty():
ret.append(super().get())
return ret
return ret

View File

@@ -156,7 +156,7 @@ class LiveChat:
continuation, session, headers)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs']/1000
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self.video_id,
"timeout": timeout,
@@ -172,7 +172,7 @@ class LiveChat:
self._callback(processed_chat)
else:
self._buffer.put(chat_component)
diff_time = timeout - (time.time()-time_mark)
diff_time = timeout - (time.time() - time_mark)
time.sleep(diff_time if diff_time > 0 else 0)
continuation = metadata.get('continuation')
except ChatParseException as e:

View File

@@ -4,18 +4,21 @@ class ChatParseException(Exception):
'''
pass
class NoYtinitialdataException(ChatParseException):
'''
Thrown when the video is not found.
'''
pass
class ResponseContextError(ChatParseException):
'''
Thrown when chat data is invalid.
'''
pass
class NoLivechatRendererException(ChatParseException):
'''
Thrown when livechatRenderer is missing in JSON.
@@ -29,24 +32,28 @@ class NoContentsException(ChatParseException):
'''
pass
class NoContinuationsException(ChatParseException):
'''
Thrown when continuation is missing in ContinuationContents.
'''
pass
class IllegalFunctionCall(Exception):
'''
Thrown when get () is called even though
Thrown when get () is called even though
set_callback () has been executed.
'''
pass
class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
class UnknownConnectionError(Exception):
pass
pass

View File

@@ -32,7 +32,7 @@ def _build(video_id, seektime, topchat_only) -> str:
elif seektime == 0:
timestamp = 1
else:
timestamp = int(seektime*1000000)
timestamp = int(seektime * 1000000)
continuation = Continuation()
entity = continuation.entity
entity.header = _gen_vid(video_id)

View File

@@ -36,9 +36,10 @@ def _gen_vid_long(video_id):
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x+y, item)).decode()
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _gen_vid(video_id):
"""generate video_id parameter.
Parameter
@@ -50,7 +51,7 @@ def _gen_vid(video_id):
bytes : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_id = video_id.encode()
header_terminator = b'\x20\x01'
item = [
@@ -61,9 +62,10 @@ def _gen_vid(video_id):
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x+y, item)).decode()
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _nval(val):
"""convert value to byte array"""
if val < 0:
@@ -84,19 +86,19 @@ def _build(video_id, seektime, topchat_only):
if seektime == 0:
times = b''
else:
times = _nval(int(seektime*1000))
times = _nval(int(seektime * 1000))
if seektime > 0:
_len_time = b'\x5A' + (len(times)+1).to_bytes(1, 'big') + b'\x10'
_len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10'
else:
_len_time = b''
header_magic = b'\xA2\x9D\xB0\xD3\x04'
sep_0 = b'\x1A'
vid = _gen_vid(video_id)
_tag = b'\x40\x01'
timestamp1 = times
sep_1 = b'\x60\x04\x72\x02\x08'
terminator = b'\x78\x01'
sep_0 = b'\x1A'
vid = _gen_vid(video_id)
_tag = b'\x40\x01'
timestamp1 = times
sep_1 = b'\x60\x04\x72\x02\x08'
terminator = b'\x78\x01'
body = [
sep_0,
@@ -110,14 +112,12 @@ def _build(video_id, seektime, topchat_only):
terminator
]
body = reduce(lambda x, y: x+y, body)
body = reduce(lambda x, y: x + y, body)
return urllib.parse.quote(
b64enc(header_magic +
_nval(len(body)) +
body
).decode()
)
b64enc(header_magic + _nval(len(body)) + body
).decode()
)
def getparam(video_id, seektime=0.0, topchat_only=False):

View File

@@ -68,12 +68,12 @@ def _build(video_id, ts1, ts2, ts3, ts4, ts5, topchat_only) -> str:
def _times(past_sec):
n = int(time.time())
_ts1 = n - random.uniform(0, 1*3)
_ts1 = n - random.uniform(0, 1 * 3)
_ts2 = n - random.uniform(0.01, 0.99)
_ts3 = n - past_sec + random.uniform(0, 1)
_ts4 = n - random.uniform(10*60, 60*60)
_ts4 = n - random.uniform(10 * 60, 60 * 60)
_ts5 = n - random.uniform(0.01, 0.99)
return list(map(lambda x: int(x*1000000), [_ts1, _ts2, _ts3, _ts4, _ts5]))
return list(map(lambda x: int(x * 1000000), [_ts1, _ts2, _ts3, _ts4, _ts5]))
def getparam(video_id, past_sec=0, topchat_only=False) -> str:

View File

@@ -22,7 +22,8 @@ class Parser:
if jsn is None:
raise ChatParseException('Called with none JSON object.')
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError('The video_id would be wrong, or video is deleted or private.')
raise ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
contents = jsn['response'].get('continuationContents')
return contents
@@ -50,17 +51,18 @@ class Parser:
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise NoContinuationsException('No Continuation')
metadata = (cont.get('invalidationContinuationData') or
cont.get('timedContinuationData') or
cont.get('reloadContinuationData') or
cont.get('liveChatReplayContinuationData')
metadata = (cont.get('invalidationContinuationData')
or cont.get('timedContinuationData')
or cont.get('reloadContinuationData')
or cont.get('liveChatReplayContinuationData')
)
if metadata is None:
if cont.get("playerSeekContinuationData"):
raise ChatParseException('Finished chat data')
unknown = list(cont.keys())[0]
if unknown:
raise ChatParseException(f"Received unknown continuation type:{unknown}")
raise ChatParseException(
f"Received unknown continuation type:{unknown}")
else:
raise ChatParseException('Cannot extract continuation data')
return self._create_data(metadata, contents)

View File

@@ -3,11 +3,12 @@ class ChatProcessor:
Abstract class that processes chat data.
Receive chat data (actions) from Listener.
'''
def process(self, chat_components: list):
'''
Interface that represents processing of chat data.
Called from LiveChat object.
Called from LiveChat object.
Parameter
----------
chat_components: List[component]
@@ -20,8 +21,3 @@ class ChatProcessor:
}
'''
pass

View File

@@ -1,5 +1,6 @@
from .chat_processor import ChatProcessor
class Combinator(ChatProcessor):
'''
Combinator combines multiple chat processors.
@@ -8,11 +9,11 @@ class Combinator(ChatProcessor):
For example:
[constructor]
chat = LiveChat("video_id", processor = ( Processor1(), Processor2(), Processor3() ) )
[receive return values]
ret1, ret2, ret3 = chat.get()
The return values are tuple of processed chat data,
The return values are tuple of processed chat data,
the order of return depends on parameter order.
Parameter
@@ -34,6 +35,4 @@ class Combinator(ChatProcessor):
Tuple of chat data processed by each chat processor.
'''
return tuple(processor.process(chat_components)
for processor in self.processors)
for processor in self.processors)

View File

@@ -1,5 +1,3 @@
import datetime
import time
from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer
@@ -39,7 +37,7 @@ class CompatibleProcessor(ChatProcessor):
chat = self.parse(action)
if chat:
chatlist.append(chat)
ret["pollingIntervalMillis"] = int(timeout*1000)
ret["pollingIntervalMillis"] = int(timeout * 1000)
ret["pageInfo"] = {
"totalResults": len(chatlist),
"resultsPerPage": len(chatlist),
@@ -58,7 +56,7 @@ class CompatibleProcessor(ChatProcessor):
rd = {}
try:
renderer = self.get_renderer(item)
if renderer == None:
if renderer is None:
return None
rd["kind"] = "youtube#liveChatMessage"

View File

@@ -1,68 +1,67 @@
import datetime, pytz
import datetime
import pytz
class BaseRenderer:
def __init__(self, item, chattype):
self.renderer = list(item.values())[0]
self.chattype = chattype
def get_snippet(self):
message = self.get_message(self.renderer)
return {
"type" : self.chattype,
"liveChatId" : "",
"authorChannelId" : self.renderer.get("authorExternalChannelId"),
"publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)),
"hasDisplayContent" : True,
"displayMessage" : message,
"type": self.chattype,
"liveChatId": "",
"authorChannelId": self.renderer.get("authorExternalChannelId"),
"publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)),
"hasDisplayContent": True,
"displayMessage": message,
"textMessageDetails": {
"messageText" : message
"messageText": message
}
}
def get_authordetails(self):
authorExternalChannelId = self.renderer.get("authorExternalChannelId")
#parse subscriber type
# parse subscriber type
isVerified, isChatOwner, isChatSponsor, isChatModerator = (
self.get_badges(self.renderer)
)
return {
"channelId" : authorExternalChannelId,
"channelUrl" : "http://www.youtube.com/channel/"+authorExternalChannelId,
"displayName" : self.renderer["authorName"]["simpleText"],
"profileImageUrl" : self.renderer["authorPhoto"]["thumbnails"][1]["url"] ,
"isVerified" : isVerified,
"isChatOwner" : isChatOwner,
"isChatSponsor" : isChatSponsor,
"isChatModerator" : isChatModerator
}
return {
"channelId": authorExternalChannelId,
"channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId,
"displayName": self.renderer["authorName"]["simpleText"],
"profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"],
"isVerified": isVerified,
"isChatOwner": isChatOwner,
"isChatSponsor": isChatSponsor,
"isChatModerator": isChatModerator
}
def get_message(self,renderer):
def get_message(self, renderer):
message = ''
if renderer.get("message"):
runs=renderer["message"].get("runs")
runs = renderer["message"].get("runs")
if runs:
for r in runs:
if r:
if r.get('emoji'):
message += r['emoji'].get('shortcuts',[''])[0]
message += r['emoji'].get('shortcuts', [''])[0]
else:
message += r.get('text','')
message += r.get('text', '')
return message
def get_badges(self,renderer):
def get_badges(self, renderer):
isVerified = False
isChatOwner = False
isChatSponsor = False
isChatModerator = False
badges=renderer.get("authorBadges")
badges = renderer.get("authorBadges")
if badges:
for badge in badges:
author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
if author_type == '確認済み':
isVerified = True
if author_type == '所有者':
@@ -72,12 +71,11 @@ class BaseRenderer:
if author_type == 'モデレーター':
isChatModerator = True
return isVerified, isChatOwner, isChatSponsor, isChatModerator
def get_id(self):
return self.renderer.get('id')
def get_publishedat(self,timestamp):
dt = datetime.datetime.fromtimestamp(int(timestamp)/1000000)
def get_publishedat(self, timestamp):
dt = datetime.datetime.fromtimestamp(int(timestamp) / 1000000)
return dt.astimezone(pytz.utc).isoformat(
timespec='milliseconds').replace('+00:00','Z')
timespec='milliseconds').replace('+00:00', 'Z')

View File

@@ -35,4 +35,4 @@ symbols = {
"NOK\xa0": {"fxtext": "NOK", "jptext": "ノルウェー・クローネ"},
"BAM\xa0": {"fxtext": "BAM", "jptext": "ボスニア・兌換マルカ"},
"SGD\xa0": {"fxtext": "SGD", "jptext": "シンガポール・ドル"}
}
}

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsorEvent")
@@ -8,36 +10,33 @@ class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
message = self.get_message(self.renderer)
return {
"type" : self.chattype,
"liveChatId" : "",
"authorChannelId" : self.renderer.get("authorExternalChannelId"),
"publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)),
"hasDisplayContent" : True,
"displayMessage" : message,
"type": self.chattype,
"liveChatId": "",
"authorChannelId": self.renderer.get("authorExternalChannelId"),
"publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)),
"hasDisplayContent": True,
"displayMessage": message,
}
def get_authordetails(self):
authorExternalChannelId = self.renderer.get("authorExternalChannelId")
#parse subscriber type
# parse subscriber type
isVerified, isChatOwner, _, isChatModerator = (
self.get_badges(self.renderer)
)
return {
"channelId" : authorExternalChannelId,
"channelUrl" : "http://www.youtube.com/channel/"+authorExternalChannelId,
"displayName" : self.renderer["authorName"]["simpleText"],
"profileImageUrl" : self.renderer["authorPhoto"]["thumbnails"][1]["url"] ,
"isVerified" : isVerified,
"isChatOwner" : isChatOwner,
"isChatSponsor" : True,
"isChatModerator" : isChatModerator
}
return {
"channelId": authorExternalChannelId,
"channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId,
"displayName": self.renderer["authorName"]["simpleText"],
"profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"],
"isVerified": isVerified,
"isChatOwner": isChatOwner,
"isChatSponsor": True,
"isChatModerator": isChatModerator
}
def get_message(self,renderer):
def get_message(self, renderer):
message = (renderer["eventText"]["runs"][0]["text"]
)+' / '+(renderer["detailText"]["simpleText"])
) + ' / ' + (renderer["detailText"]["simpleText"])
return message

View File

@@ -25,7 +25,7 @@ class LiveChatMembershipItemRenderer(BaseRenderer):
)
return {
"channelId": authorExternalChannelId,
"channelUrl": "http://www.youtube.com/channel/"+authorExternalChannelId,
"channelUrl": "http://www.youtube.com/channel/" + authorExternalChannelId,
"displayName": self.renderer["authorName"]["simpleText"],
"profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"],
"isVerified": isVerified,
@@ -35,6 +35,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer):
}
def get_message(self, renderer):
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
message = ''.join([mes.get("text", "")
for mes in renderer["headerSubtext"]["runs"]])
return message, [message]

View File

@@ -3,6 +3,7 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superChatEvent")
@@ -10,32 +11,32 @@ class LiveChatPaidMessageRenderer(BaseRenderer):
def get_snippet(self):
authorName = self.renderer["authorName"]["simpleText"]
message = self.get_message(self.renderer)
amountDisplayString, symbol, amountMicros =(
amountDisplayString, symbol, amountMicros = (
self.get_amountdata(self.renderer)
)
return {
"type" : self.chattype,
"liveChatId" : "",
"authorChannelId" : self.renderer.get("authorExternalChannelId"),
"publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)),
"hasDisplayContent" : True,
"displayMessage" : amountDisplayString+" from "+authorName+': \"'+ message+'\"',
"superChatDetails" : {
"amountMicros" : amountMicros,
"currency" : currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol,
"amountDisplayString" : amountDisplayString,
"tier" : 0,
"backgroundColor" : self.renderer.get("bodyBackgroundColor", 0)
"type": self.chattype,
"liveChatId": "",
"authorChannelId": self.renderer.get("authorExternalChannelId"),
"publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)),
"hasDisplayContent": True,
"displayMessage": amountDisplayString + " from " + authorName + ': \"' + message + '\"',
"superChatDetails": {
"amountMicros": amountMicros,
"currency": currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol,
"amountDisplayString": amountDisplayString,
"tier": 0,
"backgroundColor": self.renderer.get("bodyBackgroundColor", 0)
}
}
def get_amountdata(self,renderer):
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
amountMicros = int(float(m.group(2).replace(',',''))*1000000)
amountMicros = int(float(m.group(2).replace(',', '')) * 1000000)
else:
symbol = ""
amountMicros = 0
return amountDisplayString, symbol, amountMicros
return amountDisplayString, symbol, amountMicros

View File

@@ -3,46 +3,45 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superStickerEvent")
def get_snippet(self):
authorName = self.renderer["authorName"]["simpleText"]
amountDisplayString, symbol, amountMicros =(
amountDisplayString, symbol, amountMicros = (
self.get_amountdata(self.renderer)
)
return {
"type" : self.chattype,
"liveChatId" : "",
"authorChannelId" : self.renderer.get("authorExternalChannelId"),
"publishedAt" : self.get_publishedat(self.renderer.get("timestampUsec",0)),
"hasDisplayContent" : True,
"displayMessage" : "Super Sticker " + amountDisplayString + " from "+authorName,
"superStickerDetails" : {
"superStickerMetaData" : {
"type": self.chattype,
"liveChatId": "",
"authorChannelId": self.renderer.get("authorExternalChannelId"),
"publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)),
"hasDisplayContent": True,
"displayMessage": "Super Sticker " + amountDisplayString + " from " + authorName,
"superStickerDetails": {
"superStickerMetaData": {
"stickerId": "",
"altText": "",
"language": ""
"language": ""
},
"amountMicros" : amountMicros,
"currency" : currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol,
"amountDisplayString" : amountDisplayString,
"tier" : 0,
"backgroundColor" : self.renderer.get("bodyBackgroundColor", 0)
"amountMicros": amountMicros,
"currency": currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol,
"amountDisplayString": amountDisplayString,
"tier": 0,
"backgroundColor": self.renderer.get("bodyBackgroundColor", 0)
}
}
def get_amountdata(self,renderer):
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
amountMicros = int(float(m.group(2).replace(',',''))*1000000)
amountMicros = int(float(m.group(2).replace(',', '')) * 1000000)
else:
symbol = ""
amountMicros = 0
return amountDisplayString, symbol, amountMicros

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "textMessageEvent")

View File

@@ -20,13 +20,13 @@ class Chatdata:
if self.interval == 0:
time.sleep(1)
return
time.sleep(self.interval/len(self.items))
time.sleep(self.interval / len(self.items))
async def tick_async(self):
if self.interval == 0:
await asyncio.sleep(1)
return
await asyncio.sleep(self.interval/len(self.items))
await asyncio.sleep(self.interval / len(self.items))
class DefaultProcessor(ChatProcessor):
@@ -62,7 +62,7 @@ class DefaultProcessor(ChatProcessor):
return None
try:
renderer = self._get_renderer(item)
if renderer == None:
if renderer is None:
return None
renderer.get_snippet()

View File

@@ -1,6 +1,10 @@
from datetime import datetime
class Author:
pass
class BaseRenderer:
def __init__(self, item, chattype):
self.renderer = list(item.values())[0]
@@ -10,65 +14,62 @@ class BaseRenderer:
def get_snippet(self):
self.type = self.chattype
self.id = self.renderer.get('id')
timestampUsec = int(self.renderer.get("timestampUsec",0))
self.timestamp = int(timestampUsec/1000)
timestampUsec = int(self.renderer.get("timestampUsec", 0))
self.timestamp = int(timestampUsec / 1000)
tst = self.renderer.get("timestampText")
if tst:
self.elapsedTime = tst.get("simpleText")
else:
self.elapsedTime = ""
self.datetime = self.get_datetime(timestampUsec)
self.message ,self.messageEx = self.get_message(self.renderer)
self.id = self.renderer.get('id')
self.amountValue= 0.0
self.message, self.messageEx = self.get_message(self.renderer)
self.id = self.renderer.get('id')
self.amountValue = 0.0
self.amountString = ""
self.currency= ""
self.currency = ""
self.bgColor = 0
def get_authordetails(self):
self.author.badgeUrl = ""
(self.author.isVerified,
self.author.isChatOwner,
self.author.isChatSponsor,
self.author.isChatModerator) = (
(self.author.isVerified,
self.author.isChatOwner,
self.author.isChatSponsor,
self.author.isChatModerator) = (
self.get_badges(self.renderer)
)
self.author.channelId = self.renderer.get("authorExternalChannelId")
self.author.channelUrl = "http://www.youtube.com/channel/"+self.author.channelId
self.author.name = self.renderer["authorName"]["simpleText"]
self.author.imageUrl= self.renderer["authorPhoto"]["thumbnails"][1]["url"]
self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId
self.author.name = self.renderer["authorName"]["simpleText"]
self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"]
def get_message(self,renderer):
def get_message(self, renderer):
message = ''
message_ex = []
if renderer.get("message"):
runs=renderer["message"].get("runs")
runs = renderer["message"].get("runs")
if runs:
for r in runs:
if r:
if r.get('emoji'):
message += r['emoji'].get('shortcuts',[''])[0]
message_ex.append(r['emoji']['image']['thumbnails'][1].get('url'))
message += r['emoji'].get('shortcuts', [''])[0]
message_ex.append(
r['emoji']['image']['thumbnails'][1].get('url'))
else:
message += r.get('text','')
message_ex.append(r.get('text',''))
message += r.get('text', '')
message_ex.append(r.get('text', ''))
return message, message_ex
def get_badges(self,renderer):
def get_badges(self, renderer):
self.author.type = ''
isVerified = False
isChatOwner = False
isChatSponsor = False
isChatModerator = False
badges=renderer.get("authorBadges")
badges = renderer.get("authorBadges")
if badges:
for badge in badges:
if badge["liveChatAuthorBadgeRenderer"].get("icon"):
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
self.author.type = author_type
if author_type == 'VERIFIED':
isVerified = True
@@ -81,13 +82,10 @@ class BaseRenderer:
self.author.type = 'MEMBER'
self.get_badgeurl(badge)
return isVerified, isChatOwner, isChatSponsor, isChatModerator
def get_badgeurl(self,badge):
def get_badgeurl(self, badge):
self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"]
def get_datetime(self,timestamp):
dt = datetime.fromtimestamp(timestamp/1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def get_datetime(self, timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')

View File

@@ -35,4 +35,4 @@ symbols = {
"NOK\xa0": {"fxtext": "NOK", "jptext": "ノルウェー・クローネ"},
"BAM\xa0": {"fxtext": "BAM", "jptext": "ボスニア・兌換マルカ"},
"SGD\xa0": {"fxtext": "SGD", "jptext": "シンガポール・ドル"}
}
}

View File

@@ -1,18 +1,15 @@
from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsor")
def get_authordetails(self):
super().get_authordetails()
self.author.isChatSponsor = True
self.author.isChatSponsor = True
def get_message(self,renderer):
def get_message(self, renderer):
message = (renderer["eventText"]["runs"][0]["text"]
)+' / '+(renderer["detailText"]["simpleText"])
) + ' / ' + (renderer["detailText"]["simpleText"])
return message

View File

@@ -10,6 +10,6 @@ class LiveChatMembershipItemRenderer(BaseRenderer):
self.author.isChatSponsor = True
def get_message(self, renderer):
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
message = ''.join([mes.get("text", "")
for mes in renderer["headerSubtext"]["runs"]])
return message, [message]

View File

@@ -3,30 +3,29 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superChat")
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount =(
amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer)
)
self.amountValue= amount
self.amountValue = amount
self.amountString = amountDisplayString
self.currency= currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
self.bgColor= self.renderer.get("bodyBackgroundColor", 0)
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("bodyBackgroundColor", 0)
def get_amountdata(self,renderer):
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
amount = float(m.group(2).replace(',',''))
amount = float(m.group(2).replace(',', ''))
else:
symbol = ""
amount = 0.0
return amountDisplayString, symbol, amount
return amountDisplayString, symbol, amount

View File

@@ -3,37 +3,31 @@ from . import currency
from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superSticker")
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount =(
amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer)
)
self.amountValue = amount
self.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("moneyChipBackgroundColor", 0)
self.sticker = "https:"+self.renderer["sticker"]["thumbnails"][0]["url"]
self.sticker = "https:" + \
self.renderer["sticker"]["thumbnails"][0]["url"]
def get_amountdata(self,renderer):
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
amount = float(m.group(2).replace(',',''))
amount = float(m.group(2).replace(',', ''))
else:
symbol = ""
amount = 0.0
return amountDisplayString, symbol, amount

View File

@@ -1,4 +1,6 @@
from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "textMessage")

View File

@@ -1,8 +1,10 @@
from .chat_processor import ChatProcessor
class DummyProcessor(ChatProcessor):
'''
Dummy processor just returns received chat_components directly.
'''
def process(self, chat_components: list):
return chat_components

View File

@@ -1,18 +1,18 @@
import csv
import os
import re
from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
,'type','authorChannel']
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime', 'elapsed', 'authorName',
'message', 'superchat', 'type', 'authorChannel']
HEADER_HTML = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
'''
class HTMLArchiver(ChatProcessor):
'''
HtmlArchiver saves chat data as HTML table format.
@@ -21,7 +21,7 @@ class HTMLArchiver(ChatProcessor):
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
with open(self.save_path, mode='a', encoding='utf-8') as f:
f.write(HEADER_HTML)
f.write('<table border="1" style="border-collapse: collapse">')
f.writelines(self._parse_html_header(fmt_headers))
@@ -34,30 +34,30 @@ class HTMLArchiver(ChatProcessor):
newpath = filepath
counter = 0
while os.path.exists(newpath):
match = re.search(PATTERN,body)
match = re.search(PATTERN, body)
if match:
counter=int(match[2])+1
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath),body+extention)
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath
def process(self, chat_components: list):
"""
Returns
----------
dict :
dict :
save_path : str :
Actual save path of file.
total_lines : int :
count of total lines written to the file.
"""
if chat_components is None or len (chat_components) == 0:
if chat_components is None or len(chat_components) == 0:
return
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
with open(self.save_path, mode='a', encoding='utf-8') as f:
chats = self.processor.process(chat_components).items
for c in chats:
f.writelines(
@@ -76,23 +76,22 @@ class HTMLArchiver(ChatProcessor):
Comment out below line to prevent the table
display from collapsing.
'''
#f.write('</table>')
# f.write('</table>')
def _parse_html_line(self, raw_line):
html = ''
html+=' <tr>'
html += ' <tr>'
for cell in raw_line:
html+='<td>'+cell+'</td>'
html+='</tr>\n'
html += '<td>' + cell + '</td>'
html += '</tr>\n'
return html
def _parse_html_header(self,raw_line):
def _parse_html_header(self, raw_line):
html = ''
html+='<thead>\n'
html+=' <tr>'
html += '<thead>\n'
html += ' <tr>'
for cell in raw_line:
html+='<th>'+cell+'</th>'
html+='</tr>\n'
html+='</thead>\n'
html += '<th>' + cell + '</th>'
html += '</tr>\n'
html += '</thead>\n'
return html

View File

@@ -1,10 +1,10 @@
import datetime
import json
import os
import re
from .chat_processor import ChatProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
class JsonfileArchiver(ChatProcessor):
"""
@@ -13,39 +13,44 @@ class JsonfileArchiver(ChatProcessor):
Parameter:
----------
save_path : str :
save path of file.If a file with the same name exists,
save path of file.If a file with the same name exists,
it is automatically saved under a different name
with suffix '(number)'
"""
def __init__(self,save_path):
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
self.line_counter = 0
def process(self,chat_components: list):
def process(self, chat_components: list):
"""
Returns
----------
dict :
dict :
save_path : str :
Actual save path of file.
total_lines : int :
count of total lines written to the file.
"""
if chat_components is None: return
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
if chat_components is None:
return
with open(self.save_path, mode='a', encoding='utf-8') as f:
for component in chat_components:
if component is None: continue
if component is None:
continue
chatdata = component.get('chatdata')
if chatdata is None: continue
if chatdata is None:
continue
for action in chatdata:
if action is None: continue
json_line = json.dumps(action, ensure_ascii = False)
f.writelines(json_line+'\n')
self.line_counter+=1
return { "save_path" : self.save_path,
"total_lines": self.line_counter }
if action is None:
continue
json_line = json.dumps(action, ensure_ascii=False)
f.writelines(json_line + '\n')
self.line_counter += 1
return {"save_path": self.save_path,
"total_lines": self.line_counter}
def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0]
@@ -53,14 +58,12 @@ class JsonfileArchiver(ChatProcessor):
newpath = filepath
counter = 0
while os.path.exists(newpath):
match = re.search(PATTERN,body)
match = re.search(PATTERN, body)
if match:
counter=int(match[2])+1
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath),body+extention)
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath

View File

@@ -1,47 +1,49 @@
import json
import os
import traceback
import datetime
import time
from .chat_processor import ChatProcessor
##version 2
class SimpleDisplayProcessor(ChatProcessor):
def process(self, chat_components: list):
chatlist = []
timeout = 0
if chat_components is None:
return {"timeout":timeout, "chatlist":chatlist}
return {"timeout": timeout, "chatlist": chatlist}
for component in chat_components:
timeout += component.get('timeout', 0)
chatdata = component.get('chatdata')
if chatdata is None:break
for action in chatdata:
if action is None:continue
if action.get('addChatItemAction') is None:continue
if action['addChatItemAction'].get('item') is None:continue
root = action['addChatItemAction']['item'].get('liveChatTextMessageRenderer')
if chatdata is None:
break
for action in chatdata:
if action is None:
continue
if action.get('addChatItemAction') is None:
continue
if action['addChatItemAction'].get('item') is None:
continue
root = action['addChatItemAction']['item'].get(
'liveChatTextMessageRenderer')
if root:
author_name = root['authorName']['simpleText']
message = self._parse_message(root.get('message'))
purchase_amount_text = ''
else:
root = ( action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer') or
action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer') )
root = (action['addChatItemAction']['item'].get('liveChatPaidMessageRenderer')
or action['addChatItemAction']['item'].get('liveChatPaidStickerRenderer'))
if root:
author_name = root['authorName']['simpleText']
message = self._parse_message(root.get('message'))
purchase_amount_text = root['purchaseAmountText']['simpleText']
else:
continue
chatlist.append(f'[{author_name}]: {message} {purchase_amount_text}')
return {"timeout":timeout, "chatlist":chatlist}
def _parse_message(self,message):
chatlist.append(
f'[{author_name}]: {message} {purchase_amount_text}')
return {"timeout": timeout, "chatlist": chatlist}
def _parse_message(self, message):
if message is None:
return ''
if message.get('simpleText'):
@@ -51,11 +53,9 @@ class SimpleDisplayProcessor(ChatProcessor):
tmp = ''
for run in runs:
if run.get('emoji'):
tmp+=(run['emoji']['shortcuts'][0])
tmp += (run['emoji']['shortcuts'][0])
elif run.get('text'):
tmp+=(run['text'])
tmp += (run['text'])
return tmp
else:
return ''

View File

@@ -5,10 +5,12 @@ Calculate speed of chat.
"""
import time
from .. chat_processor import ChatProcessor
class RingQueue:
"""
リング型キュー
Attributes
----------
items : list
@@ -21,10 +23,10 @@ class RingQueue:
キュー内に余裕があるか。キュー内のアイテム個数が、キューの最大個数未満であればTrue。
"""
def __init__(self, capacity):
def __init__(self, capacity):
"""
コンストラクタ
Parameter
----------
capacity:このキューに格納するアイテムの最大個数。
@@ -50,17 +52,17 @@ class RingQueue:
"""
if self.mergin:
self.items.append(item)
self.last_pos = len(self.items)-1
if self.last_pos == self.capacity-1:
self.last_pos = len(self.items) - 1
if self.last_pos == self.capacity - 1:
self.mergin = False
return
self.last_pos += 1
if self.last_pos > self.capacity-1:
if self.last_pos > self.capacity - 1:
self.last_pos = 0
self.items[self.last_pos] = item
self.first_pos += 1
if self.first_pos > self.capacity-1:
if self.first_pos > self.capacity - 1:
self.first_pos = 0
def get(self):
@@ -76,11 +78,12 @@ class RingQueue:
def item_count(self):
return len(self.items)
class SpeedCalculator(ChatProcessor, RingQueue):
"""
チャットの勢いを計算する。
一定期間のチャットデータのうち、最初のチャットの投稿時刻と
最後のチャットの投稿時刻の差を、チャット数で割り返し
1分あたりの速度に換算する。
@@ -91,7 +94,7 @@ class SpeedCalculator(ChatProcessor, RingQueue):
RingQueueに格納するチャット勢い算出用データの最大数
"""
def __init__(self, capacity = 10):
def __init__(self, capacity=10):
super().__init__(capacity)
self.speed = 0
@@ -105,7 +108,6 @@ class SpeedCalculator(ChatProcessor, RingQueue):
self._put_chatdata(chatdata)
self.speed = self._calc_speed()
return self.speed
def _calc_speed(self):
"""
@@ -116,14 +118,13 @@ class SpeedCalculator(ChatProcessor, RingQueue):
---------------------------
チャット速度(1分間で換算したチャット数)
"""
try:
#キュー内の総チャット数
try:
# キュー内の総チャット数
total = sum(item['chat_count'] for item in self.items)
#キュー内の最初と最後のチャットの時間差
duration = (self.items[self.last_pos]['endtime']
- self.items[self.first_pos]['starttime'])
# キュー内の最初と最後のチャットの時間差
duration = (self.items[self.last_pos]['endtime'] - self.items[self.first_pos]['starttime'])
if duration != 0:
return int(total*60/duration)
return int(total * 60 / duration)
return 0
except IndexError:
return 0
@@ -143,61 +144,60 @@ class SpeedCalculator(ChatProcessor, RingQueue):
'''
チャットデータがない場合に空のデータをキューに投入する。
'''
timestamp_now = int(time.time())
timestamp_now = int(time.time())
self.put({
'chat_count':0,
'starttime':int(timestamp_now),
'endtime':int(timestamp_now)
'chat_count': 0,
'starttime': int(timestamp_now),
'endtime': int(timestamp_now)
})
def _get_timestamp(action :dict):
def _get_timestamp(action: dict):
"""
チャットデータから時刻データを取り出す。
"""
try:
item = action['addChatItemAction']['item']
timestamp = int(item[list(item.keys())[0]]['timestampUsec'])
except (KeyError,TypeError):
except (KeyError, TypeError):
return None
return timestamp
if actions is None or len(actions)==0:
if actions is None or len(actions) == 0:
_put_emptydata()
return
#actions内の時刻データを持つチャットデータの数
counter=0
#actions内の最初のチャットデータの時刻
starttime= None
#actions内の最後のチャットデータの時刻
endtime=None
return
# actions内の時刻データを持つチャットデータの数
counter = 0
# actions内の最初のチャットデータの時刻
starttime = None
# actions内の最後のチャットデータの時刻
endtime = None
for action in actions:
#チャットデータからtimestampUsecを読み取る
# チャットデータからtimestampUsecを読み取る
gettime = _get_timestamp(action)
#時刻のないデータだった場合は次の行のデータで読み取り試行
# 時刻のないデータだった場合は次の行のデータで読み取り試行
if gettime is None:
continue
#最初に有効な時刻を持つデータのtimestampをstarttimeに設定
# 最初に有効な時刻を持つデータのtimestampをstarttimeに設定
if starttime is None:
starttime = gettime
#最後のtimestampを設定(途中で時刻のないデータの場合もあるので上書きしていく)
# 最後のtimestampを設定(途中で時刻のないデータの場合もあるので上書きしていく)
endtime = gettime
#チャットの数をインクリメント
# チャットの数をインクリメント
counter += 1
#チャット速度用のデータをRingQueueに送る
# チャット速度用のデータをRingQueueに送る
if starttime is None or endtime is None:
_put_emptydata()
return
self.put({
'chat_count':counter,
'starttime':int(starttime/1000000),
'endtime':int(endtime/1000000)
})
return
self.put({
'chat_count': counter,
'starttime': int(starttime / 1000000),
'endtime': int(endtime / 1000000)
})

View File

@@ -15,10 +15,12 @@ items_sticker = [
'liveChatPaidStickerRenderer'
]
class SuperchatCalculator(ChatProcessor):
"""
Calculate the amount of SuperChat by currency.
"""
def __init__(self):
self.results = {}
@@ -34,22 +36,24 @@ class SuperchatCalculator(ChatProcessor):
return self.results
for component in chat_components:
chatdata = component.get('chatdata')
if chatdata is None: continue
if chatdata is None:
continue
for action in chatdata:
renderer = self._get_item(action, items_paid) or \
self._get_item(action, items_sticker)
if renderer is None: continue
self._get_item(action, items_sticker)
if renderer is None:
continue
symbol, amount = self._parse(renderer)
self.results.setdefault(symbol,0)
self.results[symbol]+=amount
self.results.setdefault(symbol, 0)
self.results[symbol] += amount
return self.results
def _parse(self, renderer):
purchase_amount_text = renderer["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(purchase_amount_text)
if m:
symbol = m.group(1)
amount = float(m.group(2).replace(',',''))
amount = float(m.group(2).replace(',', ''))
else:
symbol = ""
amount = 0.0
@@ -69,6 +73,3 @@ class SuperchatCalculator(ChatProcessor):
continue
return None
return dict_body

View File

@@ -4,9 +4,10 @@ import re
from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchatAmount'
,'authorType','authorChannel']
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime', 'elapsed', 'authorName', 'message',
'superchatAmount', 'authorType', 'authorChannel']
class TSVArchiver(ChatProcessor):
'''
@@ -16,7 +17,7 @@ class TSVArchiver(ChatProcessor):
def __init__(self, save_path):
super().__init__()
self.save_path = self._checkpath(save_path)
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
with open(self.save_path, mode='a', encoding='utf-8') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerow(fmt_headers)
self.processor = DefaultProcessor()
@@ -28,30 +29,30 @@ class TSVArchiver(ChatProcessor):
newpath = filepath
counter = 0
while os.path.exists(newpath):
match = re.search(PATTERN,body)
match = re.search(PATTERN, body)
if match:
counter=int(match[2])+1
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath),body+extention)
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath
def process(self, chat_components: list):
"""
Returns
----------
dict :
dict :
save_path : str :
Actual save path of file.
total_lines : int :
count of total lines written to the file.
"""
if chat_components is None or len (chat_components) == 0:
if chat_components is None or len(chat_components) == 0:
return
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
with open(self.save_path, mode='a', encoding='utf-8') as f:
writer = csv.writer(f, delimiter='\t')
chats = self.processor.process(chat_components).items
for c in chats:
@@ -64,7 +65,3 @@ class TSVArchiver(ChatProcessor):
c.author.type,
c.author.channelId
])

View File

@@ -5,7 +5,7 @@ from . import parser
from . block import Block
from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... import config
from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError
@@ -17,10 +17,11 @@ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
def _split(start, end, count, min_interval_sec = 120):
def _split(start, end, count, min_interval_sec=120):
"""
Split section from `start` to `end` into `count` pieces,
and returns the beginning of each piece.
and returns the beginning of each piece.
The `count` is adjusted so that the length of each piece
is no smaller than `min_interval`.
@@ -28,41 +29,43 @@ def _split(start, end, count, min_interval_sec = 120):
--------
List of the offset of each block's first chat data.
"""
if not (isinstance(start,int) or isinstance(start,float)) or \
not (isinstance(end,int) or isinstance(end,float)):
if not (isinstance(start, int) or isinstance(start, float)) or \
not (isinstance(end, int) or isinstance(end, float)):
raise ValueError("start/end must be int or float")
if not isinstance(count,int):
if not isinstance(count, int):
raise ValueError("count must be int")
if start>end:
if start > end:
raise ValueError("end must be equal to or greater than start.")
if count<1:
if count < 1:
raise ValueError("count must be equal to or greater than 1.")
if (end-start)/count < min_interval_sec:
count = int((end-start)/min_interval_sec)
if count == 0 : count = 1
interval= (end-start)/count
if (end - start) / count < min_interval_sec:
count = int((end - start) / min_interval_sec)
if count == 0:
count = 1
interval = (end - start) / count
if count == 1:
return [start]
return sorted( list(set( [int(start + interval*j)
for j in range(count) ])))
return sorted(list(set([int(start + interval * j)
for j in range(count)])))
def ready_blocks(video_id, duration, div, callback):
if div <= 0: raise ValueError
if div <= 0:
raise ValueError
async def _get_blocks( video_id, duration, div, callback):
async def _get_blocks(video_id, duration, div, callback):
async with aiohttp.ClientSession() as session:
tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(-1, duration, div)]
tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(-1, duration, div)]
return await asyncio.gather(*tasks)
async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime = seektime)
continuation = arcparam.getparam(video_id, seektime=seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try :
async with session.get(url, headers = headers) as resp:
try:
async with session.get(url, headers=headers) as resp:
text = await resp.text()
next_continuation, actions = parser.parse(json.loads(text))
break
@@ -76,41 +79,42 @@ def ready_blocks(video_id, duration, div, callback):
first = parser.get_offset(actions[0])
last = parser.get_offset(actions[-1])
if callback:
callback(actions,last-first)
callback(actions, last - first)
return Block(
continuation = next_continuation,
chat_data = actions,
first = first,
last = last
continuation=next_continuation,
chat_data=actions,
first=first,
last=last
)
"""
fetch initial blocks.
"""
"""
loop = asyncio.get_event_loop()
blocks = loop.run_until_complete(
_get_blocks(video_id, duration, div, callback))
return blocks
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
workers = [
ExtractWorker(
fetch = _fetch, block = block,
blocks = blocks, video_id = video_id
fetch=_fetch, block=block,
blocks=blocks, video_id=video_id
)
for block in blocks
]
async with aiohttp.ClientSession() as session:
tasks = [worker.run(session) for worker in workers]
return await asyncio.gather(*tasks)
return await asyncio.gather(*tasks)
async def _fetch(continuation,session) -> Patch:
async def _fetch(continuation, session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
async with session.get(url,headers = config.headers) as resp:
async with session.get(url, headers=config.headers) as resp:
chat_json = await resp.text()
continuation, actions = parser.parse(json.loads(chat_json))
break
@@ -126,21 +130,22 @@ def fetch_patch(callback, blocks, video_id):
if callback:
callback(actions, last - first)
return Patch(actions, continuation, first, last)
return Patch(continuation = continuation)
return Patch(continuation=continuation)
"""
allocate workers and assign blocks.
"""
"""
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(_allocate_workers())
except CancelledError:
pass
async def _shutdown():
print("\nshutdown...")
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
@@ -148,7 +153,7 @@ async def _shutdown():
except asyncio.CancelledError:
pass
def cancel():
loop = asyncio.get_event_loop()
loop.create_task(_shutdown())

View File

@@ -1,14 +1,13 @@
from . import parser
class Block:
"""Block object represents something like a box
"""Block object represents something like a box
to join chunk of chatdata.
Parameter:
---------
first : int :
videoOffsetTimeMs of the first chat_data
videoOffsetTimeMs of the first chat_data
(chat_data[0])
last : int :
videoOffsetTimeMs of the last chat_data.
(chat_data[-1])
@@ -23,15 +22,15 @@ class Block:
continuation : str :
continuation param of last chat data.
chat_data : list
chat_data : list
done : bool :
whether this block has been fetched.
remaining : int :
remaining data to extract.
equals end - last.
is_last : bool :
whether this block is the last one in blocklist.
@@ -39,13 +38,13 @@ class Block:
whether this block is in the process of during_split.
while True, this block is excluded from duplicate split procedure.
"""
__slots__ = ['first','last','end','continuation','chat_data','remaining',
'done','is_last','during_split']
def __init__(self, first = 0, last = 0, end = 0,
continuation = '', chat_data = [], is_last = False,
during_split = False):
__slots__ = ['first', 'last', 'end', 'continuation', 'chat_data', 'remaining',
'done', 'is_last', 'during_split']
def __init__(self, first=0, last=0, end=0,
continuation='', chat_data=[], is_last=False,
during_split=False):
self.first = first
self.last = last
self.end = end

View File

@@ -1,7 +1,8 @@
from . import parser
def check_duplicate(chatdata):
max_range = len(chatdata)-1
max_range = len(chatdata) - 1
tbl_offset = [None] * max_range
tbl_id = [None] * max_range
tbl_type = [None] * max_range
@@ -9,33 +10,31 @@ def check_duplicate(chatdata):
def create_table(chatdata, max_range):
for i in range(max_range):
tbl_offset[i] = parser.get_offset(chatdata[i])
tbl_id[i] = parser.get_id(chatdata[i])
tbl_id[i] = parser.get_id(chatdata[i])
tbl_type[i] = parser.get_type(chatdata[i])
def is_duplicate(i, j):
return (
return (
tbl_offset[i] == tbl_offset[j]
and
tbl_id[i] == tbl_id[j]
and
tbl_type[i] == tbl_type[j]
and tbl_id[i] == tbl_id[j]
and tbl_type[i] == tbl_type[j]
)
print("creating table...")
create_table(chatdata,max_range)
create_table(chatdata, max_range)
print("searching duplicate data...")
return [{ "i":{
"index" : i, "id" : parser.get_id(chatdata[i]),
"offsetTime" : parser.get_offset(chatdata[i]),
"type" : parser.get_type(chatdata[i])
},
"j":{
"index" : j, "id" : parser.get_id(chatdata[j]),
"offsetTime" : parser.get_offset(chatdata[j]),
"type" : parser.get_type(chatdata[j])
}
}
for i in range(max_range) for j in range(i+1,max_range)
if is_duplicate(i,j)]
return [{"i": {
"index": i, "id": parser.get_id(chatdata[i]),
"offsetTime": parser.get_offset(chatdata[i]),
"type": parser.get_type(chatdata[i])
},
"j":{
"index": j, "id": parser.get_id(chatdata[j]),
"offsetTime": parser.get_offset(chatdata[j]),
"type": parser.get_type(chatdata[j])
}
}
for i in range(max_range) for j in range(i + 1, max_range)
if is_duplicate(i, j)]
def check_duplicate_offset(chatdata):
@@ -47,27 +46,27 @@ def check_duplicate_offset(chatdata):
def create_table(chatdata, max_range):
for i in range(max_range):
tbl_offset[i] = parser.get_offset(chatdata[i])
tbl_id[i] = parser.get_id(chatdata[i])
tbl_id[i] = parser.get_id(chatdata[i])
tbl_type[i] = parser.get_type(chatdata[i])
def is_duplicate(i, j):
return (
return (
tbl_offset[i] == tbl_offset[j]
and
tbl_id[i] == tbl_id[j]
and tbl_id[i] == tbl_id[j]
)
print("creating table...")
create_table(chatdata,max_range)
create_table(chatdata, max_range)
print("searching duplicate data...")
return [{
"index" : i, "id" : tbl_id[i],
"offsetTime" : tbl_offset[i],
"type:" : tbl_type[i]
}
for i in range(max_range-1)
if is_duplicate(i,i+1)]
"index": i, "id": tbl_id[i],
"offsetTime": tbl_offset[i],
"type:": tbl_type[i]
}
for i in range(max_range - 1)
if is_duplicate(i, i + 1)]
def remove_duplicate_head(blocks):
if len(blocks) == 0 or len(blocks) == 1:
@@ -77,64 +76,62 @@ def remove_duplicate_head(blocks):
if len(blocks[index].chat_data) == 0:
return True
elif len(blocks[index+1].chat_data) == 0:
elif len(blocks[index + 1].chat_data) == 0:
return False
id_0 = parser.get_id(blocks[index].chat_data[0])
id_1 = parser.get_id(blocks[index+1].chat_data[0])
id_1 = parser.get_id(blocks[index + 1].chat_data[0])
type_0 = parser.get_type(blocks[index].chat_data[0])
type_1 = parser.get_type(blocks[index+1].chat_data[0])
type_1 = parser.get_type(blocks[index + 1].chat_data[0])
return (
blocks[index].first == blocks[index+1].first
and
id_0 == id_1
and
type_0 == type_1
blocks[index].first == blocks[index + 1].first
and id_0 == id_1
and type_0 == type_1
)
ret = [blocks[i] for i in range(len(blocks)-1)
if (len(blocks[i].chat_data)>0 and
not is_duplicate_head(i) )]
ret = [blocks[i] for i in range(len(blocks) - 1)
if (len(blocks[i].chat_data) > 0
and not is_duplicate_head(i))]
ret.append(blocks[-1])
return ret
def remove_duplicate_tail(blocks):
if len(blocks) == 0 or len(blocks) == 1:
return blocks
return blocks
def is_duplicate_tail(index):
if len(blocks[index].chat_data) == 0:
return True
elif len(blocks[index-1].chat_data) == 0:
elif len(blocks[index - 1].chat_data) == 0:
return False
id_0 = parser.get_id(blocks[index-1].chat_data[-1])
id_0 = parser.get_id(blocks[index - 1].chat_data[-1])
id_1 = parser.get_id(blocks[index].chat_data[-1])
type_0 = parser.get_type(blocks[index-1].chat_data[-1])
type_0 = parser.get_type(blocks[index - 1].chat_data[-1])
type_1 = parser.get_type(blocks[index].chat_data[-1])
return (
blocks[index-1].last == blocks[index].last
and
id_0 == id_1
and
type_0 == type_1
blocks[index - 1].last == blocks[index].last
and id_0 == id_1
and type_0 == type_1
)
ret = [blocks[i] for i in range(0,len(blocks))
if i == 0 or not is_duplicate_tail(i) ]
ret = [blocks[i] for i in range(0, len(blocks))
if i == 0 or not is_duplicate_tail(i)]
return ret
def remove_overlap(blocks):
"""
Fix overlapped blocks after ready_blocks().
Align the last offset of each block to the first offset
Align the last offset of each block to the first offset
of next block (equals `end` offset of each block).
"""
if len(blocks) == 0 or len(blocks) == 1:
return blocks
return blocks
for block in blocks:
if block.is_last:
break
if len(block.chat_data)==0:
if len(block.chat_data) == 0:
continue
block_end = block.end
if block.last >= block_end:
@@ -143,14 +140,14 @@ def remove_overlap(blocks):
break
block.chat_data.pop()
block.last = parser.get_offset(line)
block.remaining=0
block.done=True
block.remaining = 0
block.done = True
block.continuation = None
return blocks
def _dump(blocks):
print(f"---------- first last end---")
for i,block in enumerate(blocks):
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")
print("---------- first last end---")
for i, block in enumerate(blocks):
print(
f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")

View File

@@ -1,16 +1,16 @@
from . import asyncdl
from . import duplcheck
from . import parser
from . import duplcheck
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers=config.headers
headers = config.headers
class Extractor:
def __init__(self, video_id, div = 1, callback = None, processor = None):
if not isinstance(div ,int) or div < 1:
def __init__(self, video_id, div=1, callback=None, processor=None):
if not isinstance(div, int) or div < 1:
raise ValueError('div must be positive integer.')
elif div > 10:
div = 10
@@ -33,7 +33,7 @@ class Extractor:
blocks = asyncdl.ready_blocks(
self.video_id, self.duration, self.div, self.callback)
self.blocks = [block for block in blocks if block]
return self
return self
def _remove_duplicate_head(self):
self.blocks = duplcheck.remove_duplicate_head(self.blocks)
@@ -41,10 +41,10 @@ class Extractor:
def _set_block_end(self):
if len(self.blocks) > 0:
for i in range(len(self.blocks)-1):
self.blocks[i].end = self.blocks[i+1].first
self.blocks[-1].end = self.duration*1000
self.blocks[-1].is_last =True
for i in range(len(self.blocks) - 1):
self.blocks[i].end = self.blocks[i + 1].first
self.blocks[-1].end = self.duration * 1000
self.blocks[-1].is_last = True
return self
def _remove_overlap(self):
@@ -62,7 +62,7 @@ class Extractor:
def _combine(self):
ret = []
for block in self.blocks:
ret.extend(block.chat_data)
ret.extend(block.chat_data)
return ret
def _execute_extract_operations(self):
@@ -82,11 +82,12 @@ class Extractor:
return []
data = self._execute_extract_operations()
if self.processor is None:
return data
return data
return self.processor.process(
[{'video_id':None,'timeout':1,'chatdata' : (action
["replayChatItemAction"]["actions"][0] for action in data)}]
)
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
def cancel(self):
asyncdl.cancel()
asyncdl.cancel()

View File

@@ -1,12 +1,12 @@
import json
from ... import config
from ... exceptions import (
ResponseContextError,
NoContentsException,
NoContinuationsException )
from ... exceptions import (
ResponseContextError,
NoContentsException,
NoContinuationsException)
logger = config.logger(__name__)
def parse(jsn):
"""
Parse replay chat data.
@@ -20,12 +20,12 @@ def parse(jsn):
actions : list
"""
if jsn is None:
if jsn is None:
raise ValueError("parameter JSON is None")
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError(
'video_id is invalid or private/deleted.')
contents=jsn['response'].get('continuationContents')
'video_id is invalid or private/deleted.')
contents = jsn['response'].get('continuationContents')
if contents is None:
raise NoContentsException('No chat data.')
@@ -43,12 +43,12 @@ def parse(jsn):
def get_offset(item):
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].values())[0].get('id')
)[0])['item'].values())[0].get('id')
def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].keys())[0]
)[0])['item'].keys())[0]

View File

@@ -2,17 +2,19 @@ from . import parser
from . block import Block
from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
which is fetched by asyncdl.fetch_patch._fetch().
"""
chats : list = []
continuation : str = None
first : int = None
last : int = None
chats: list = []
continuation: str = None
first: int = None
last: int = None
def fill(block:Block, patch:Patch):
def fill(block: Block, patch: Patch):
block_end = block.end
if patch.last < block_end or block.is_last:
set_patch(block, patch)
@@ -23,32 +25,31 @@ def fill(block:Block, patch:Patch):
break
patch.chats.pop()
set_patch(block, patch._replace(
continuation = None,
last = line_offset
)
continuation=None,
last=line_offset
)
block.remaining=0
block.done=True
)
block.remaining = 0
block.done = True
def split(parent_block:Block, child_block:Block, patch:Patch):
def split(parent_block: Block, child_block: Block, patch: Patch):
parent_block.during_split = False
if patch.first <= parent_block.last:
''' When patch overlaps with parent_block,
discard this block. '''
child_block.continuation = None
''' Leave child_block.during_split == True
''' Leave child_block.during_split == True
to exclude from during_split sequence. '''
return
return
child_block.during_split = False
child_block.first = patch.first
parent_block.end = patch.first
fill(child_block, patch)
def set_patch(block:Block, patch:Patch):
def set_patch(block: Block, patch: Patch):
block.continuation = patch.continuation
block.chat_data.extend(patch.chats)
block.last = patch.last
block.remaining = block.end-block.last
block.remaining = block.end - block.last

View File

@@ -1,8 +1,8 @@
from . import parser
from . block import Block
from . patch import Patch, fill, split
from . patch import fill, split
from ... paramgen import arcparam
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
@@ -17,18 +17,18 @@ class ExtractWorker:
block : Block :
Block object that includes chat_data
blocks : list :
List of Block(s)
video_id : str :
parent_block : Block :
the block from which current block is splitted
the block from which current block is splitted
"""
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
def __init__(self, fetch, block, blocks, video_id ):
def __init__(self, fetch, block, blocks, video_id):
self.block = block
self.fetch = fetch
self.blocks = blocks
@@ -47,33 +47,35 @@ class ExtractWorker:
if self.parent_block:
split(self.parent_block, self.block, patch)
self.parent_block = None
else:
else:
fill(self.block, patch)
if self.block.continuation is None:
"""finished fetching this block """
self.block.done = True
self.block = _search_new_block(self)
def _search_new_block(worker) -> Block:
index, undone_block = _get_undone_block(worker.blocks)
if undone_block is None:
return Block(continuation = None)
mean = (undone_block.last + undone_block.end)/2
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
return Block(continuation=None)
mean = (undone_block.last + undone_block.end) / 2
continuation = arcparam.getparam(worker.video_id, seektime=mean / 1000)
worker.parent_block = undone_block
worker.parent_block.during_split = True
new_block = Block(
end = undone_block.end,
chat_data = [],
continuation = continuation,
during_split = True,
is_last = worker.parent_block.is_last)
end=undone_block.end,
chat_data=[],
continuation=continuation,
during_split=True,
is_last=worker.parent_block.is_last)
'''swap last block'''
if worker.parent_block.is_last:
worker.parent_block.is_last = False
worker.blocks.insert(index+1, new_block)
worker.blocks.insert(index + 1, new_block)
return new_block
def _get_undone_block(blocks) -> (int, Block):
min_interval_ms = 120000
max_remaining = 0

View File

@@ -1,15 +1,14 @@
import json
import json
import re
import requests
from .. import config
from .. import util
from ..exceptions import InvalidVideoIdException
from ..exceptions import InvalidVideoIdException
headers = config.headers
pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);")
item_channel_id =[
item_channel_id = [
"videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer",
"channelThumbnailEndpoint",
@@ -29,7 +28,7 @@ item_response = [
"embedded_player_response"
]
item_author_image =[
item_author_image = [
"videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer",
"channelThumbnail",
@@ -63,6 +62,7 @@ item_moving_thumbnail = [
"url"
]
class VideoInfo:
'''
VideoInfo object retrieves YouTube video information.
@@ -76,6 +76,7 @@ class VideoInfo:
InvalidVideoIdException :
Occurs when video_id does not exist on YouTube.
'''
def __init__(self, video_id):
self.video_id = video_id
text = self._get_page_text(video_id)
@@ -83,13 +84,13 @@ class VideoInfo:
def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}"
resp = requests.get(url, headers = headers)
resp = requests.get(url, headers=headers)
resp.raise_for_status()
return resp.text
def _parse(self, text):
result = re.search(pattern, text)
res= json.loads(result.group(1))
res = json.loads(result.group(1))
response = self._get_item(res, item_response)
if response is None:
self._check_video_is_private(res.get("args"))
@@ -98,7 +99,7 @@ class VideoInfo:
raise InvalidVideoIdException(
f"No renderer found in video_id: [{self.video_id}].")
def _check_video_is_private(self,args):
def _check_video_is_private(self, args):
if args and args.get("video_id"):
raise InvalidVideoIdException(
f"video_id [{self.video_id}] is private or deleted.")
@@ -130,8 +131,8 @@ class VideoInfo:
def get_title(self):
if self._renderer.get("title"):
return [''.join(run["text"])
for run in self._renderer["title"]["runs"]][0]
return [''.join(run["text"])
for run in self._renderer["title"]["runs"]][0]
return None
def get_channel_id(self):
@@ -141,13 +142,13 @@ class VideoInfo:
return None
def get_author_image(self):
return self._get_item(self._renderer, item_author_image)
return self._get_item(self._renderer, item_author_image)
def get_thumbnail(self):
return self._get_item(self._renderer, item_thumbnail)
def get_channel_name(self):
return self._get_item(self._renderer, item_channel_name)
def get_moving_thumbnail(self):
return self._get_item(self._renderer, item_moving_thumbnail)

View File

@@ -1,15 +1,18 @@
import requests,json,datetime
import requests
import json
import datetime
from .. import config
def extract(url):
_session = requests.Session()
html = _session.get(url, headers=config.headers)
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
)+'test.json',mode ='w',encoding='utf-8') as f:
json.dump(html.json(),f,ensure_ascii=False)
) + 'test.json', mode='w', encoding='utf-8') as f:
json.dump(html.json(), f, ensure_ascii=False)
def save(data,filename,extention):
with open(filename+"_"+(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
)+extention,mode ='w',encoding='utf-8') as f:
def save(data, filename, extention):
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
mode='w', encoding='utf-8') as f:
f.writelines(data)