Merge branch 'release/v0.4.0'

This commit is contained in:
taizan-hokouto
2020-11-03 18:20:09 +09:00
32 changed files with 776 additions and 553 deletions

150
README.md
View File

@@ -5,9 +5,7 @@ pytchat is a python library for fetching youtube live chat.
## Description ## Description
pytchat is a python library for fetching youtube live chat pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup. without using Selenium or BeautifulSoup.
pytchatは、YouTubeチャットを閲覧するためのpythonライブラリです。
Other features: Other features:
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one. + Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
@@ -16,7 +14,7 @@ Other features:
instead of web scraping. instead of web scraping.
For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki). <br> For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki). <br>
より詳細な解説は[wiki](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)を参照してください。 [wiki (Japanese)](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)
## Install ## Install
```python ```python
@@ -27,128 +25,55 @@ pip install pytchat
### CLI ### CLI
One-liner command. One-liner command.
Save chat data to html, with embedded custom emojis.
Save chat data to html with embedded custom emojis.
Show chat stream (--echo option).
```bash ```bash
$ pytchat -v https://www.youtube.com/watch?v=ZJ6Q4U_Vg6s -o "c:/temp/" $ pytchat -v https://www.youtube.com/watch?v=uIx8l2xlYVY -o "c:/temp/"
# options: # options:
# -v : Video ID or URL that includes ID # -v : Video ID or URL that includes ID
# -o : output directory (default path: './') # -o : output directory (default path: './')
# --echo : Show chats.
# saved filename is [video_id].html # saved filename is [video_id].html
``` ```
### on-demand mode ### On-demand mode with simple non-buffered object.
```python ```python
from pytchat import LiveChat import pytchat
livechat = LiveChat(video_id = "Zvp1pJpie4I") chat = pytchat.create(video_id="uIx8l2xlYVY")
# It is also possible to specify a URL that includes the video ID:
# livechat = LiveChat("https://www.youtube.com/watch?v=Zvp1pJpie4I")
while livechat.is_alive():
try:
chatdata = livechat.get()
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]- {c.message}")
chatdata.tick()
except KeyboardInterrupt:
livechat.terminate()
break
```
### callback mode
```python
from pytchat import LiveChat
import time
def main():
livechat = LiveChat(video_id = "Zvp1pJpie4I", callback = disp)
while livechat.is_alive():
#other background operation.
time.sleep(1)
livechat.terminate()
#callback function (automatically called)
def disp(chatdata):
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]- {c.message}")
chatdata.tick()
if __name__ == '__main__':
main()
```
### asyncio context:
```python
from pytchat import LiveChatAsync
from concurrent.futures import CancelledError
import asyncio
async def main():
livechat = LiveChatAsync("Zvp1pJpie4I", callback = func)
while livechat.is_alive():
#other background operation.
await asyncio.sleep(3)
#callback function is automatically called.
async def func(chatdata):
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
await chatdata.tick_async()
if __name__ == '__main__':
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
except CancelledError:
pass
```
### youtube api compatible processor:
```python
from pytchat import LiveChat, CompatibleProcessor
import time
chat = LiveChat("Zvp1pJpie4I",
processor = CompatibleProcessor() )
while chat.is_alive(): while chat.is_alive():
try: for c in chat.get().sync_items():
data = chat.get() print(f"{c.datetime} [{c.author.name}]- {c.message}")
polling = data['pollingIntervalMillis']/1000
for c in data['items']:
if c.get('snippet'):
print(f"[{c['authorDetails']['displayName']}]"
f"-{c['snippet']['displayMessage']}")
time.sleep(polling/len(data['items']))
except KeyboardInterrupt:
chat.terminate()
``` ```
### replay:
If specified video is not live,
automatically try to fetch archived chat data.
### Output JSON format (feature of [DefaultProcessor](DefaultProcessor))
```python ```python
from pytchat import LiveChat import pytchat
import time
def main(): chat = pytchat.create(video_id="uIx8l2xlYVY")
#seektime (seconds): start position of chat. while chat.is_alive():
chat = LiveChat("ojes5ULOqhc", seektime = 60*30) print(chat.get().json())
print('Replay from 30:00') time.sleep(5)
try: '''
while chat.is_alive(): # Each chat item can also be output in JSON format.
data = chat.get() for c in chat.get().sync_items():
for c in data.items: print(c.json())
print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}") '''
data.tick()
except KeyboardInterrupt:
chat.terminate()
if __name__ == '__main__':
main()
``` ```
### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
### other
#### Fetch chat with buffer.
[LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat)
#### Asyncio Context
[LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync)
#### [YT API compatible chat processor]https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor)
### [Extract archived chat data](https://github.com/taizan-hokuto/pytchat/wiki/Extractor)
```python ```python
from pytchat import HTMLArchiver, Extractor from pytchat import HTMLArchiver, Extractor
@@ -164,7 +89,7 @@ print("finished.")
``` ```
## Structure of Default Processor ## Structure of Default Processor
Each item can be got with `items` function. Each item can be got with `sync_items()` function.
<table> <table>
<tr> <tr>
<th>name</th> <th>name</th>
@@ -298,6 +223,9 @@ Most of source code of CLI refer to:
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader) [PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
Progress bar in CLI is based on:
[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3)
## Author ## Author

View File

@@ -2,13 +2,28 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.3.2' __version__ = '0.4.0'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
__url__ = 'https://github.com/taizan-hokuto/pytchat' __url__ = 'https://github.com/taizan-hokuto/pytchat'
__all__ = ["core_async","core_multithread","processors"]
from .exceptions import (
ChatParseException,
ResponseContextError,
NoContents,
NoContinuation,
IllegalFunctionCall,
InvalidVideoIdException,
UnknownConnectionError,
RetryExceedMaxCount,
ChatDataFinished,
ReceivedUnknownContinuation,
FailedExtractContinuation,
VideoInfoParseError,
PatternUnmatchError
)
from .api import ( from .api import (
cli, cli,
@@ -26,7 +41,7 @@ from .api import (
SimpleDisplayProcessor, SimpleDisplayProcessor,
SpeedCalculator, SpeedCalculator,
SuperchatCalculator, SuperchatCalculator,
VideoInfo VideoInfo,
create
) )
# flake8: noqa # flake8: noqa

View File

@@ -1,5 +1,6 @@
from . import cli from . import cli
from . import config from . import config
from .core import create
from .core_multithread.livechat import LiveChat from .core_multithread.livechat import LiveChat
from .core_async.livechat import LiveChatAsync from .core_async.livechat import LiveChatAsync
from .processors.chat_processor import ChatProcessor from .processors.chat_processor import ChatProcessor
@@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator
from .tool.extract.extractor import Extractor from .tool.extract.extractor import Extractor
from .tool.videoinfo import VideoInfo from .tool.videoinfo import VideoInfo
__all__ = [
cli,
config,
LiveChat,
LiveChatAsync,
ChatProcessor,
CompatibleProcessor,
DummyProcessor,
DefaultProcessor,
Extractor,
HTMLArchiver,
TSVArchiver,
JsonfileArchiver,
SimpleDisplayProcessor,
SpeedCalculator,
SuperchatCalculator,
VideoInfo,
create
]
# flake8: noqa # flake8: noqa

View File

@@ -10,6 +10,7 @@ from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
from .arguments import Arguments from .arguments import Arguments
from .echo import Echo
from .progressbar import ProgressBar from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
from .. processors.html_archiver import HTMLArchiver from .. processors.html_archiver import HTMLArchiver
@@ -41,11 +42,13 @@ def main():
help='Save error data when error occurs(".dat" file)') help='Save error data when error occurs(".dat" file)')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version') help='Show version')
parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true',
help='Show chats of specified video')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
if Arguments().print_version: if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto') print(f'pytchat v{__version__} © 2019,2020 taizan-hokuto')
return return
# Extractor # Extractor
@@ -53,6 +56,20 @@ def main():
parser.print_help() parser.print_help()
return return
# Echo
if Arguments().echo:
if len(Arguments().video_ids) > 1:
print("You can specify only one video ID.")
return
try:
Echo(Arguments().video_ids[0]).run()
except InvalidVideoIdException as e:
print("Invalid video id:", str(e))
except Exception as e:
print(type(e), str(e))
finally:
return
if not os.path.exists(Arguments().output): if not os.path.exists(Arguments().output):
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output)) print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
return return
@@ -77,10 +94,13 @@ class Runner:
path = util.checkpath(separated_path + video_id + '.html') path = util.checkpath(separated_path + video_id + '.html')
try: try:
info = VideoInfo(video_id) info = VideoInfo(video_id)
except Exception as e: except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e))) print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().save_error_data: if Arguments().save_error_data:
util.save(str(e), "ERR", ".dat") util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue continue
print(f"\n" print(f"\n"

View File

@@ -19,6 +19,7 @@ class Arguments(metaclass=Singleton):
OUTPUT: str = 'output_dir' OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id' VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data' SAVE_ERROR_DATA: bool = 'save_error_data'
ECHO: bool = 'echo'
def __init__(self, def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None): arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -36,8 +37,9 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT] self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = [] self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
self.echo: bool = arguments[Arguments.Name.ECHO]
# Videos # Videos
if arguments[Arguments.Name.VIDEO_IDS]: if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')] for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]

22
pytchat/cli/echo.py Normal file
View File

@@ -0,0 +1,22 @@
import pytchat
from ..exceptions import ChatDataFinished, NoContents
from ..util.extract_video_id import extract_video_id
class Echo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
def run(self):
livechat = pytchat.create(self.video_id)
while livechat.is_alive():
chatdata = livechat.get()
for c in chatdata.sync_items():
print(f"{c.datetime} [{c.author.name}] {c.message} {c.amountString}")
try:
livechat.raise_for_status()
except (ChatDataFinished, NoContents):
print("Chat finished.")
except Exception as e:
print(type(e), str(e))

View File

@@ -1,5 +1,5 @@
''' '''
This code for this progress bar is based on This code is based on
vladignatyev/progress.py vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License) (MIT License)

View File

@@ -1,4 +1,4 @@
import logging import logging # noqa
from . import mylogger from . import mylogger
headers = { headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',

7
pytchat/core/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
from .pytchat import PytchatCore
from .. util.extract_video_id import extract_video_id
def create(video_id: str, **kwargs):
_vid = extract_video_id(video_id)
return PytchatCore(_vid, **kwargs)

207
pytchat/core/pytchat.py Normal file
View File

@@ -0,0 +1,207 @@
import httpx
import json
import signal
import time
import traceback
import urllib.parse
from ..parser.live import Parser
from .. import config
from .. import exceptions
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
class PytchatCore:
'''
Parameter
---------
video_id : str
seektime : int
start position of fetching chat (seconds).
This option is valid for archived chat only.
If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor
interruptable : bool
Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
force_replay : bool
force to fetch archived chat data, even if specified video is live.
topchat_only : bool
If True, get only top chat.
hold_exception : bool [default:True]
If True, when exceptions occur, the exception is held internally,
and can be raised by raise_for_status().
Attributes
---------
_is_alive : bool
Flag to stop getting chat.
'''
_setup_finished = False
def __init__(self, video_id,
seektime=-1,
processor=DefaultProcessor(),
interruptable=True,
force_replay=False,
topchat_only=False,
hold_exception=True,
logger=config.logger(__name__),
):
self._video_id = extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
else:
self.processor = processor
self._is_alive = True
self._is_replay = force_replay
self._hold_exception = hold_exception
self._exception_holder = None
self._parser = Parser(
is_replay=self._is_replay,
exception_holder=self._exception_holder
)
self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation="
self._topchat_only = topchat_only
self._logger = logger
if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
self._setup()
def _setup(self):
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
def _get_chat_component(self):
''' Fetch chat data and store them into buffer,
get next continuaiton parameter and loop.
Parameter
---------
continuation : str
parameter for next chat data
'''
try:
with httpx.Client(http2=True) as client:
if self.continuation and self._is_alive:
contents = self._get_contents(self.continuation, client, headers)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self._video_id,
"timeout": timeout,
"chatdata": chatdata
}
self.continuation = metadata.get('continuation')
return chat_component
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
self._raise_exception(e)
except (TypeError, json.JSONDecodeError) as e:
self._logger.error(f"{traceback.format_exc(limit=-1)}")
self._raise_exception(e)
self._logger.debug(f"[{self._video_id}]finished fetching chat.")
self._raise_exception(exceptions.ChatDataFinished)
def _get_contents(self, continuation, client, headers):
'''Get 'continuationContents' from livechat json.
If contents is None at first fetching,
try to fetch archive chat data.
Return:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = (
self._get_livechat_json(continuation, client, headers)
)
contents = self._parser.get_contents(livechat_json)
if self._first_fetch:
if contents is None or self._is_replay:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
if reload_continuation:
livechat_json = (self._get_livechat_json(
reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json)
self._is_replay = True
self._first_fetch = False
return contents
def _get_livechat_json(self, continuation, client, headers):
'''
Get json which includes chat data.
'''
continuation = urllib.parse.quote(continuation)
livechat_json = None
err = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1):
with client:
try:
livechat_json = client.get(url, headers=headers).json()
break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e
time.sleep(2)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. Last error: {str(err)}")
self._raise_exception(exceptions.RetryExceedMaxCount())
return livechat_json
def get(self):
if self.is_alive():
chat_component = self._get_chat_component()
return self.processor.process([chat_component])
else:
return []
def is_replay(self):
return self._is_replay
def is_alive(self):
return self._is_alive
def terminate(self):
self._is_alive = False
self.processor.finalize()
def raise_for_status(self):
if self._exception_holder is not None:
raise self._exception_holder
def _raise_exception(self, exception: Exception = None):
self._is_alive = False
if self._hold_exception is False:
raise exception
self._exception_holder = exception

View File

@@ -4,13 +4,13 @@ import asyncio
class Buffer(asyncio.Queue): class Buffer(asyncio.Queue):
''' '''
チャットデータを格納するバッファの役割を持つFIFOキュー Buffer for storing chat data.
Parameter Parameter
--------- ---------
maxsize : int maxsize : int
格納するチャットブロックの最大個数。0の場合は無限。 Maximum number of chat blocks to be stored.
最大値を超える場合は古いチャットブロックから破棄される。 If it exceeds the maximum, the oldest chat block will be discarded.
''' '''
def __init__(self, maxsize=0): def __init__(self, maxsize=0):

View File

@@ -22,54 +22,51 @@ MAX_RETRY = 10
class LiveChatAsync: class LiveChatAsync:
'''asyncioを利用してYouTubeのライブ配信のチャットデータを取得する。 '''LiveChatAsync object fetches chat data and stores them
in a buffer with asyncio.
Parameter Parameter
--------- ---------
video_id : str video_id : str
動画ID
seektime : int seektime : int
(ライブチャット取得時は無視) start position of fetching chat (seconds).
取得開始するアーカイブ済みチャットの経過時間(秒) This option is valid for archived chat only.
マイナス値を指定した場合は、配信開始前のチャットも取得する。 If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor processor : ChatProcessor
チャットデータを加工するオブジェクト
buffer : Buffer(maxsize:20[default]) buffer : Buffer
チャットデータchat_componentを格納するバッファ。 buffer of chat data fetched background.
maxsize : 格納できるchat_componentの個数
default値20個。1個で約5~10秒分。
interruptable : bool interruptable : bool
Ctrl+Cによる処理中断を行うかどうか。 Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
callback : func callback : func
_listen()関数から一定間隔で自動的に呼びだす関数。 function called periodically from _listen().
done_callback : func done_callback : func
listener終了時に呼び出すコールバック。 function called when listener ends.
exception_handler : func exception_handler : func
例外を処理する関数
direct_mode : bool direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。 If True, invoke specified callback function without using buffer.
Trueの場合、callbackの設定が必須 callback is required. If not, IllegalFunctionCall will be raised.
(設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても force to fetch archived chat data, even if specified video is live.
強制的にアーカイブ済みチャットを取得する。
topchat_only : bool topchat_only : bool
Trueの場合、上位チャットのみ取得する。 If True, get only top chat.
Attributes Attributes
--------- ---------
_is_alive : bool _is_alive : bool
チャット取得を停止するためのフラグ Flag to stop getting chat.
''' '''
_setup_finished = False _setup_finished = False
@@ -114,31 +111,30 @@ class LiveChatAsync:
self._set_exception_handler(exception_handler) self._set_exception_handler(exception_handler)
if interruptable: if interruptable:
signal.signal(signal.SIGINT, signal.signal(signal.SIGINT,
(lambda a, b: asyncio.create_task( (lambda a, b: self._keyboard_interrupt()))
LiveChatAsync.shutdown(None, signal.SIGINT, b))))
self._setup() self._setup()
def _setup(self): def _setup(self):
# direct modeがTrueでcallback未設定の場合例外発生。 # An exception is raised when direct mode is true and no callback is set.
if self._direct_mode: if self._direct_mode:
if self._callback is None: if self._callback is None:
raise exceptions.IllegalFunctionCall( raise exceptions.IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.") "When direct_mode=True, callback parameter is required.")
else: else:
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 # Create a default buffer if `direct_mode` is False and buffer is not set.
if self._buffer is None: if self._buffer is None:
self._buffer = Buffer(maxsize=20) self._buffer = Buffer(maxsize=20)
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成 # Create a loop task to call callback if the `callback` param is specified.
if self._callback is None: if self._callback is None:
pass pass
else: else:
# callbackを呼ぶループタスクの開始 # Create a loop task to call callback if the `callback` param is specified.
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.create_task(self._callback_loop(self._callback)) loop.create_task(self._callback_loop(self._callback))
# _listenループタスクの開始 # Start a loop task for _listen()
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
self.listen_task = loop.create_task(self._startlisten()) self.listen_task = loop.create_task(self._startlisten())
# add_done_callbackの登録 # Register add_done_callback
if self._done_callback is None: if self._done_callback is None:
self.listen_task.add_done_callback(self._finish) self.listen_task.add_done_callback(self._finish)
else: else:
@@ -194,7 +190,7 @@ class LiveChatAsync:
self._logger.error(f"{traceback.format_exc(limit = -1)}") self._logger.error(f"{traceback.format_exc(limit = -1)}")
raise raise
self._logger.debug(f"[{self._video_id}]finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
raise exceptions.ChatDataFinished raise exceptions.ChatDataFinished
async def _check_pause(self, continuation): async def _check_pause(self, continuation):
@@ -246,30 +242,30 @@ class LiveChatAsync:
''' '''
continuation = urllib.parse.quote(continuation) continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
status_code = 0
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
try: try:
resp = await client.get(url, headers=headers) resp = await client.get(url, headers=headers)
livechat_json = resp.json() livechat_json = resp.json()
break break
except (httpx.HTTPError, json.JSONDecodeError): except (json.JSONDecodeError, httpx.HTTPError):
await asyncio.sleep(1) await asyncio.sleep(1)
continue continue
else: else:
self._logger.error(f"[{self._video_id}]" self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. status_code={status_code}") f"Exceeded retry count.")
return None return None
return livechat_json return livechat_json
async def _callback_loop(self, callback): async def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで """ If a callback is specified in the constructor,
callbackに指定された関数に一定間隔でチャットデータを投げる。 it throws chat data at regular intervals to the
function specified in the callback in the backgroun
Parameter Parameter
--------- ---------
callback : func callback : func
加工済みのチャットデータを渡す先の関数。 function to which the processed chat data is passed.
""" """
while self.is_alive(): while self.is_alive():
items = await self._buffer.get() items = await self._buffer.get()
@@ -280,11 +276,13 @@ class LiveChatAsync:
await self._callback(processed_chat) await self._callback(processed_chat)
async def get(self): async def get(self):
""" bufferからデータを取り出し、processorに投げ、 """
加工済みのチャットデータを返す。 Retrieves data from the buffer,
throws it to the processor,
and returns the processed chat data.
Returns Returns
: Processorによって加工されたチャットデータ : Chat data processed by the Processor
""" """
if self._callback is None: if self._callback is None:
if self.is_alive(): if self.is_alive():
@@ -293,7 +291,7 @@ class LiveChatAsync:
else: else:
return [] return []
raise exceptions.IllegalFunctionCall( raise exceptions.IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。") "Callback parameter is already set, so get() cannot be performed.")
def is_replay(self): def is_replay(self):
return self._is_replay return self._is_replay
@@ -314,11 +312,11 @@ class LiveChatAsync:
return self._is_alive return self._is_alive
def _finish(self, sender): def _finish(self, sender):
'''Listener終了時のコールバック''' '''Called when the _listen() task finished.'''
try: try:
self._task_finished() self._task_finished()
except CancelledError: except CancelledError:
self._logger.debug(f'[{self._video_id}]cancelled:{sender}') self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self): def terminate(self):
if self._pauser.empty(): if self._pauser.empty():
@@ -326,10 +324,14 @@ class LiveChatAsync:
self._is_alive = False self._is_alive = False
self._buffer.put_nowait({}) self._buffer.put_nowait({})
self.processor.finalize() self.processor.finalize()
def _keyboard_interrupt(self):
self.exception = exceptions.ChatDataFinished()
self.terminate()
def _task_finished(self): def _task_finished(self):
''' '''
Listenerを終了する。 Terminate fetching chats.
''' '''
if self.is_alive(): if self.is_alive():
self.terminate() self.terminate()
@@ -339,7 +341,7 @@ class LiveChatAsync:
self.exception = e self.exception = e
if not isinstance(e, exceptions.ChatParseException): if not isinstance(e, exceptions.ChatParseException):
self._logger.error(f'Internal exception - {type(e)}{str(e)}') self._logger.error(f'Internal exception - {type(e)}{str(e)}')
self._logger.info(f'[{self._video_id}]終了しました') self._logger.info(f'[{self._video_id}] finished.')
def raise_for_status(self): def raise_for_status(self):
if self.exception is not None: if self.exception is not None:
@@ -349,15 +351,3 @@ class LiveChatAsync:
def _set_exception_handler(cls, handler): def _set_exception_handler(cls, handler):
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.set_exception_handler(handler) loop.set_exception_handler(handler)
@classmethod
async def shutdown(cls, event, sig=None, handler=None):
cls._logger.debug("shutdown...")
tasks = [t for t in asyncio.all_tasks() if t is not
asyncio.current_task()]
[task.cancel() for task in tasks]
cls._logger.debug("complete remaining tasks...")
await asyncio.gather(*tasks, return_exceptions=True)
loop = asyncio.get_event_loop()
loop.stop()

View File

@@ -4,13 +4,13 @@ import queue
class Buffer(queue.Queue): class Buffer(queue.Queue):
''' '''
チャットデータを格納するバッファの役割を持つFIFOキュー Buffer for storing chat data.
Parameter Parameter
--------- ---------
max_size : int maxsize : int
格納するチャットブロックの最大個数。0の場合は無限。 Maximum number of chat blocks to be stored.
最大値を超える場合は古いチャットブロックから破棄される。 If it exceeds the maximum, the oldest chat block will be discarded.
''' '''
def __init__(self, maxsize=0): def __init__(self, maxsize=0):

View File

@@ -21,54 +21,53 @@ MAX_RETRY = 10
class LiveChat: class LiveChat:
''' スレッドプールを利用してYouTubeのライブ配信のチャットデータを取得する '''
LiveChat object fetches chat data and stores them
in a buffer with ThreadpoolExecutor.
Parameter Parameter
--------- ---------
video_id : str video_id : str
動画ID
seektime : int seektime : int
(ライブチャット取得時は無視) start position of fetching chat (seconds).
取得開始するアーカイブ済みチャットの経過時間(秒) This option is valid for archived chat only.
マイナス値を指定した場合は、配信開始前のチャットも取得する。 If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor processor : ChatProcessor
チャットデータを加工するオブジェクト
buffer : Buffer(maxsize:20[default]) buffer : Buffer
チャットデータchat_componentを格納するバッファ。 buffer of chat data fetched background.
maxsize : 格納できるchat_componentの個数
default値20個。1個で約5~10秒分。
interruptable : bool interruptable : bool
Ctrl+Cによる処理中断を行うかどうか。 Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
callback : func callback : func
_listen()関数から一定間隔で自動的に呼びだす関数。 function called periodically from _listen().
done_callback : func done_callback : func
listener終了時に呼び出すコールバック。 function called when listener ends.
direct_mode : bool direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。 If True, invoke specified callback function without using buffer.
Trueの場合、callbackの設定が必須 callback is required. If not, IllegalFunctionCall will be raised.
(設定していない場合IllegalFunctionCall例外を発生させる
force_replay : bool force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても force to fetch archived chat data, even if specified video is live.
強制的にアーカイブ済みチャットを取得する。
topchat_only : bool topchat_only : bool
Trueの場合、上位チャットのみ取得する。 If True, get only top chat.
Attributes Attributes
--------- ---------
_executor : ThreadPoolExecutor _executor : ThreadPoolExecutor
チャットデータ取得ループ_listen用のスレッド This is used for _listen() loop.
_is_alive : bool _is_alive : bool
チャット取得を停止するためのフラグ Flag to stop getting chat.
''' '''
_setup_finished = False _setup_finished = False
@@ -112,24 +111,24 @@ class LiveChat:
self._setup() self._setup()
def _setup(self): def _setup(self):
# direct modeがTrueでcallback未設定の場合例外発生。 # An exception is raised when direct mode is true and no callback is set.
if self._direct_mode: if self._direct_mode:
if self._callback is None: if self._callback is None:
raise exceptions.IllegalFunctionCall( raise exceptions.IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.") "When direct_mode=True, callback parameter is required.")
else: else:
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成 # Create a default buffer if `direct_mode` is False and buffer is not set.
if self._buffer is None: if self._buffer is None:
self._buffer = Buffer(maxsize=20) self._buffer = Buffer(maxsize=20)
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成 # Create a loop task to call callback if the `callback` param is specified.
if self._callback is None: if self._callback is None:
pass pass
else: else:
# callbackを呼ぶループタスクの開始 # Start a loop task calling callback function.
self._executor.submit(self._callback_loop, self._callback) self._executor.submit(self._callback_loop, self._callback)
# _listenループタスクの開始 # Start a loop task for _listen()
self.listen_task = self._executor.submit(self._startlisten) self.listen_task = self._executor.submit(self._startlisten)
# add_done_callbackの登録 # Register add_done_callback
if self._done_callback is None: if self._done_callback is None:
self.listen_task.add_done_callback(self._finish) self.listen_task.add_done_callback(self._finish)
else: else:
@@ -184,7 +183,7 @@ class LiveChat:
self._logger.error(f"{traceback.format_exc(limit=-1)}") self._logger.error(f"{traceback.format_exc(limit=-1)}")
raise raise
self._logger.debug(f"[{self._video_id}]finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
raise exceptions.ChatDataFinished raise exceptions.ChatDataFinished
def _check_pause(self, continuation): def _check_pause(self, continuation):
@@ -236,30 +235,30 @@ class LiveChat:
''' '''
continuation = urllib.parse.quote(continuation) continuation = urllib.parse.quote(continuation)
livechat_json = None livechat_json = None
status_code = 0
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1" url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1): for _ in range(MAX_RETRY + 1):
with client: with client:
try: try:
livechat_json = client.get(url, headers=headers).json() livechat_json = client.get(url, headers=headers).json()
break break
except json.JSONDecodeError: except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(1) time.sleep(2)
continue continue
else: else:
self._logger.error(f"[{self._video_id}]" self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. status_code={status_code}") f"Exceeded retry count.")
raise exceptions.RetryExceedMaxCount() raise exceptions.RetryExceedMaxCount()
return livechat_json return livechat_json
def _callback_loop(self, callback): def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで """ If a callback is specified in the constructor,
callbackに指定された関数に一定間隔でチャットデータを投げる。 it throws chat data at regular intervals to the
function specified in the callback in the backgroun
Parameter Parameter
--------- ---------
callback : func callback : func
加工済みのチャットデータを渡す先の関数。 function to which the processed chat data is passed.
""" """
while self.is_alive(): while self.is_alive():
items = self._buffer.get() items = self._buffer.get()
@@ -270,11 +269,13 @@ class LiveChat:
self._callback(processed_chat) self._callback(processed_chat)
def get(self): def get(self):
""" bufferからデータを取り出し、processorに投げ、 """
加工済みのチャットデータを返す。 Retrieves data from the buffer,
throws it to the processor,
and returns the processed chat data.
Returns Returns
: Processorによって加工されたチャットデータ : Chat data processed by the Processor
""" """
if self._callback is None: if self._callback is None:
if self.is_alive(): if self.is_alive():
@@ -283,7 +284,7 @@ class LiveChat:
else: else:
return [] return []
raise exceptions.IllegalFunctionCall( raise exceptions.IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。") "Callback parameter is already set, so get() cannot be performed.")
def is_replay(self): def is_replay(self):
return self._is_replay return self._is_replay
@@ -304,13 +305,16 @@ class LiveChat:
return self._is_alive return self._is_alive
def _finish(self, sender): def _finish(self, sender):
'''Listener終了時のコールバック''' '''Called when the _listen() task finished.'''
try: try:
self._task_finished() self._task_finished()
except CancelledError: except CancelledError:
self._logger.debug(f'[{self._video_id}]cancelled:{sender}') self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self): def terminate(self):
'''
Terminate fetching chats.
'''
if self._pauser.empty(): if self._pauser.empty():
self._pauser.put_nowait(None) self._pauser.put_nowait(None)
self._is_alive = False self._is_alive = False
@@ -319,9 +323,6 @@ class LiveChat:
self.processor.finalize() self.processor.finalize()
def _task_finished(self): def _task_finished(self):
'''
Listenerを終了する。
'''
if self.is_alive(): if self.is_alive():
self.terminate() self.terminate()
try: try:
@@ -330,7 +331,7 @@ class LiveChat:
self.exception = e self.exception = e
if not isinstance(e, exceptions.ChatParseException): if not isinstance(e, exceptions.ChatParseException):
self._logger.error(f'Internal exception - {type(e)}{str(e)}') self._logger.error(f'Internal exception - {type(e)}{str(e)}')
self._logger.info(f'[{self._video_id}]終了しました') self._logger.info(f'[{self._video_id}] finished.')
def raise_for_status(self): def raise_for_status(self):
if self.exception is not None: if self.exception is not None:

View File

@@ -76,6 +76,6 @@ class PatternUnmatchError(VideoInfoParseError):
''' '''
Thrown when failed to parse video info with unmatched pattern. Thrown when failed to parse video info with unmatched pattern.
''' '''
def __init__(self, doc): def __init__(self, doc=''):
self.msg = "PatternUnmatchError" self.msg = "PatternUnmatchError"
self.doc = doc self.doc = doc

View File

@@ -1,133 +0,0 @@
from base64 import urlsafe_b64encode as b64enc
from functools import reduce
import urllib.parse
'''
Generate continuation parameter of youtube replay chat.
Author: taizan-hokuto (2019) @taizan205
ver 0.0.1 2019.10.05
'''
def _gen_vid_long(video_id):
"""generate video_id parameter.
Parameter
---------
video_id : str
Return
---------
byte[] : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
header_terminator = b'\x20\x01'
item = [
header_magic,
_nval(len(header_id)),
header_id,
header_sep_1,
header_id,
header_terminator
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _gen_vid(video_id):
"""generate video_id parameter.
Parameter
---------
video_id : str
Return
---------
bytes : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_terminator = b'\x20\x01'
item = [
header_magic,
_nval(len(header_id)),
header_id,
header_terminator
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _nval(val):
"""convert value to byte array"""
if val < 0:
raise ValueError
buf = b''
while val >> 7:
m = val & 0xFF | 0x80
buf += m.to_bytes(1, 'big')
val >>= 7
buf += val.to_bytes(1, 'big')
return buf
def _build(video_id, seektime, topchat_only):
switch_01 = b'\x04' if topchat_only else b'\x01'
if seektime < 0:
raise ValueError("seektime must be greater than or equal to zero.")
if seektime == 0:
times = b''
else:
times = _nval(int(seektime * 1000))
if seektime > 0:
_len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10'
else:
_len_time = b''
header_magic = b'\xA2\x9D\xB0\xD3\x04'
sep_0 = b'\x1A'
vid = _gen_vid(video_id)
_tag = b'\x40\x01'
timestamp1 = times
sep_1 = b'\x60\x04\x72\x02\x08'
terminator = b'\x78\x01'
body = [
sep_0,
_nval(len(vid)),
vid,
_tag,
_len_time,
timestamp1,
sep_1,
switch_01,
terminator
]
body = reduce(lambda x, y: x + y, body)
return urllib.parse.quote(
b64enc(header_magic + _nval(len(body)) + body
).decode()
)
def getparam(video_id, seektime=0.0, topchat_only=False):
'''
Parameter
---------
seektime : int
unit:seconds
start position of fetching chat data.
topchat_only : bool
if True, fetch only 'top chat'
'''
return _build(video_id, seektime, topchat_only)

View File

@@ -8,15 +8,26 @@ from .. import exceptions
class Parser: class Parser:
'''
Parser of chat json.
Parameter
----------
is_replay : bool
__slots__ = ['is_replay'] exception_holder : Object [default:Npne]
The object holding exceptions.
This is passed from the parent livechat object.
'''
__slots__ = ['is_replay', 'exception_holder']
def __init__(self, is_replay): def __init__(self, is_replay, exception_holder=None):
self.is_replay = is_replay self.is_replay = is_replay
self.exception_holder = exception_holder
def get_contents(self, jsn): def get_contents(self, jsn):
if jsn is None: if jsn is None:
raise exceptions.IllegalFunctionCall('Called with none JSON object.') self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn['response']['responseContext'].get('errors'): if jsn['response']['responseContext'].get('errors'):
raise exceptions.ResponseContextError( raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.') 'The video_id would be wrong, or video is deleted or private.')
@@ -42,11 +53,11 @@ class Parser:
if contents is None: if contents is None:
'''Broadcasting end or cannot fetch chat stream''' '''Broadcasting end or cannot fetch chat stream'''
raise exceptions.NoContents('Chat data stream is empty.') self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
cont = contents['liveChatContinuation']['continuations'][0] cont = contents['liveChatContinuation']['continuations'][0]
if cont is None: if cont is None:
raise exceptions.NoContinuation('No Continuation') self.raise_exception(exceptions.NoContinuation('No Continuation'))
metadata = (cont.get('invalidationContinuationData') metadata = (cont.get('invalidationContinuationData')
or cont.get('timedContinuationData') or cont.get('timedContinuationData')
or cont.get('reloadContinuationData') or cont.get('reloadContinuationData')
@@ -54,13 +65,13 @@ class Parser:
) )
if metadata is None: if metadata is None:
if cont.get("playerSeekContinuationData"): if cont.get("playerSeekContinuationData"):
raise exceptions.ChatDataFinished('Finished chat data') self.raise_exception(exceptions.ChatDataFinished('Finished chat data'))
unknown = list(cont.keys())[0] unknown = list(cont.keys())[0]
if unknown: if unknown:
raise exceptions.ReceivedUnknownContinuation( self.raise_exception(exceptions.ReceivedUnknownContinuation(
f"Received unknown continuation type:{unknown}") f"Received unknown continuation type:{unknown}"))
else: else:
raise exceptions.FailedExtractContinuation('Cannot extract continuation data') self.raise_exception(exceptions.FailedExtractContinuation('Cannot extract continuation data'))
return self._create_data(metadata, contents) return self._create_data(metadata, contents)
def reload_continuation(self, contents): def reload_continuation(self, contents):
@@ -72,7 +83,7 @@ class Parser:
""" """
if contents is None: if contents is None:
'''Broadcasting end or cannot fetch chat stream''' '''Broadcasting end or cannot fetch chat stream'''
raise exceptions.NoContents('Chat data stream is empty.') self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
cont = contents['liveChatContinuation']['continuations'][0] cont = contents['liveChatContinuation']['continuations'][0]
if cont.get("liveChatReplayContinuationData"): if cont.get("liveChatReplayContinuationData"):
# chat data exist. # chat data exist.
@@ -81,7 +92,7 @@ class Parser:
init_cont = cont.get("playerSeekContinuationData") init_cont = cont.get("playerSeekContinuationData")
if init_cont: if init_cont:
return init_cont.get("continuation") return init_cont.get("continuation")
raise exceptions.ChatDataFinished('Finished chat data') self.raise_exception(exceptions.ChatDataFinished('Finished chat data'))
def _create_data(self, metadata, contents): def _create_data(self, metadata, contents):
actions = contents['liveChatContinuation'].get('actions') actions = contents['liveChatContinuation'].get('actions')
@@ -103,3 +114,8 @@ class Parser:
start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"]) start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"]) last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
return (last - start) return (last - start)
def raise_exception(self, exception):
if self.exception_holder is None:
raise exception
self.exception_holder = exception

View File

@@ -36,3 +36,7 @@ class Combinator(ChatProcessor):
''' '''
return tuple(processor.process(chat_components) return tuple(processor.process(chat_components)
for processor in self.processors) for processor in self.processors)
def finalize(self, *args, **kwargs):
[processor.finalize(*args, **kwargs)
for processor in self.processors]

View File

@@ -0,0 +1,11 @@
import json
from .renderer.base import Author
from .renderer.paidmessage import Colors
from .renderer.paidsticker import Colors2
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Author) or isinstance(obj, Colors) or isinstance(obj, Colors2):
return vars(obj)
return json.JSONEncoder.default(self, obj)

View File

@@ -1,5 +1,7 @@
import asyncio import asyncio
import json
import time import time
from .custom_encoder import CustomEncoder
from .renderer.textmessage import LiveChatTextMessageRenderer from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer
@@ -11,25 +13,120 @@ from ... import config
logger = config.logger(__name__) logger = config.logger(__name__)
class Chat:
def json(self) -> str:
return json.dumps(vars(self), ensure_ascii=False, cls=CustomEncoder)
class Chatdata: class Chatdata:
def __init__(self, chatlist: list, timeout: float):
def __init__(self, chatlist: list, timeout: float, abs_diff):
self.items = chatlist self.items = chatlist
self.interval = timeout self.interval = timeout
self.abs_diff = abs_diff
self.itemcount = 0
def tick(self): def tick(self):
if self.interval == 0: '''DEPRECATE
Use sync_items()
'''
if len(self.items) < 1:
time.sleep(1) time.sleep(1)
return return
time.sleep(self.interval / len(self.items)) if self.itemcount == 0:
self.starttime = time.time()
if len(self.items) == 1:
total_itemcount = 1
else:
total_itemcount = len(self.items) - 1
next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000
tobe_disptime = self.abs_diff + next_chattime
wait_sec = tobe_disptime - time.time()
self.itemcount += 1
if wait_sec < 0:
wait_sec = 0
time.sleep(wait_sec)
async def tick_async(self): async def tick_async(self):
if self.interval == 0: '''DEPRECATE
Use async_items()
'''
if len(self.items) < 1:
await asyncio.sleep(1) await asyncio.sleep(1)
return return
await asyncio.sleep(self.interval / len(self.items)) if self.itemcount == 0:
self.starttime = time.time()
if len(self.items) == 1:
total_itemcount = 1
else:
total_itemcount = len(self.items) - 1
next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000
tobe_disptime = self.abs_diff + next_chattime
wait_sec = tobe_disptime - time.time()
self.itemcount += 1
if wait_sec < 0:
wait_sec = 0
await asyncio.sleep(wait_sec)
def sync_items(self):
starttime = time.time()
if len(self.items) > 0:
last_chattime = self.items[-1].timestamp / 1000
tobe_disptime = self.abs_diff + last_chattime
wait_total_sec = max(tobe_disptime - time.time(), 0)
if len(self.items) > 1:
wait_sec = wait_total_sec / len(self.items)
elif len(self.items) == 1:
wait_sec = 0
for c in self.items:
if wait_sec < 0:
wait_sec = 0
time.sleep(wait_sec)
yield c
stop_interval = time.time() - starttime
if stop_interval < 1:
time.sleep(1 - stop_interval)
async def async_items(self):
starttime = time.time()
if len(self.items) > 0:
last_chattime = self.items[-1].timestamp / 1000
tobe_disptime = self.abs_diff + last_chattime
wait_total_sec = max(tobe_disptime - time.time(), 0)
if len(self.items) > 1:
wait_sec = wait_total_sec / len(self.items)
elif len(self.items) == 1:
wait_sec = 0
for c in self.items:
if wait_sec < 0:
wait_sec = 0
await asyncio.sleep(wait_sec)
yield c
stop_interval = time.time() - starttime
if stop_interval < 1:
await asyncio.sleep(1 - stop_interval)
def json(self) -> str:
return json.dumps([vars(a) for a in self.items], ensure_ascii=False, cls=CustomEncoder)
class DefaultProcessor(ChatProcessor): class DefaultProcessor(ChatProcessor):
def __init__(self):
self.first = True
self.abs_diff = 0
self.renderers = {
"liveChatTextMessageRenderer": LiveChatTextMessageRenderer(),
"liveChatPaidMessageRenderer": LiveChatPaidMessageRenderer(),
"liveChatPaidStickerRenderer": LiveChatPaidStickerRenderer(),
"liveChatLegacyPaidMessageRenderer": LiveChatLegacyPaidMessageRenderer(),
"liveChatMembershipItemRenderer": LiveChatMembershipItemRenderer()
}
def process(self, chat_components: list): def process(self, chat_components: list):
chatlist = [] chatlist = []
@@ -37,6 +134,8 @@ class DefaultProcessor(ChatProcessor):
if chat_components: if chat_components:
for component in chat_components: for component in chat_components:
if component is None:
continue
timeout += component.get('timeout', 0) timeout += component.get('timeout', 0)
chatdata = component.get('chatdata') chatdata = component.get('chatdata')
if chatdata is None: if chatdata is None:
@@ -46,43 +145,35 @@ class DefaultProcessor(ChatProcessor):
continue continue
if action.get('addChatItemAction') is None: if action.get('addChatItemAction') is None:
continue continue
if action['addChatItemAction'].get('item') is None: item = action['addChatItemAction'].get('item')
if item is None:
continue continue
chat = self._parse(item)
chat = self._parse(action)
if chat: if chat:
chatlist.append(chat) chatlist.append(chat)
return Chatdata(chatlist, float(timeout))
if self.first and chatlist:
self.abs_diff = time.time() - chatlist[0].timestamp / 1000 + 2
self.first = False
def _parse(self, sitem): chatdata = Chatdata(chatlist, float(timeout), self.abs_diff)
action = sitem.get("addChatItemAction")
if action: return chatdata
item = action.get("item")
if item is None: def _parse(self, item):
return None
try: try:
renderer = self._get_renderer(item) key = list(item.keys())[0]
renderer = self.renderers.get(key)
if renderer is None: if renderer is None:
return None return None
renderer.setitem(item.get(key), Chat())
renderer.settype()
renderer.get_snippet() renderer.get_snippet()
renderer.get_authordetails() renderer.get_authordetails()
rendered_chatobj = renderer.get_chatobj()
renderer.clear()
except (KeyError, TypeError) as e: except (KeyError, TypeError) as e:
logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}") logger.error(f"{str(type(e))}-{str(e)} item:{str(item)}")
return None return None
return renderer
return rendered_chatobj
def _get_renderer(self, item):
if item.get("liveChatTextMessageRenderer"):
renderer = LiveChatTextMessageRenderer(item)
elif item.get("liveChatPaidMessageRenderer"):
renderer = LiveChatPaidMessageRenderer(item)
elif item.get("liveChatPaidStickerRenderer"):
renderer = LiveChatPaidStickerRenderer(item)
elif item.get("liveChatLegacyPaidMessageRenderer"):
renderer = LiveChatLegacyPaidMessageRenderer(item)
elif item.get("liveChatMembershipItemRenderer"):
renderer = LiveChatMembershipItemRenderer(item)
else:
renderer = None
return renderer

View File

@@ -6,89 +6,96 @@ class Author:
class BaseRenderer: class BaseRenderer:
def __init__(self, item, chattype): def setitem(self, item, chat):
self.renderer = list(item.values())[0] self.item = item
self.chattype = chattype self.chat = chat
self.author = Author() self.chat.author = Author()
def settype(self):
pass
def get_snippet(self): def get_snippet(self):
self.type = self.chattype self.chat.id = self.item.get('id')
self.id = self.renderer.get('id') timestampUsec = int(self.item.get("timestampUsec", 0))
timestampUsec = int(self.renderer.get("timestampUsec", 0)) self.chat.timestamp = int(timestampUsec / 1000)
self.timestamp = int(timestampUsec / 1000) tst = self.item.get("timestampText")
tst = self.renderer.get("timestampText")
if tst: if tst:
self.elapsedTime = tst.get("simpleText") self.chat.elapsedTime = tst.get("simpleText")
else: else:
self.elapsedTime = "" self.chat.elapsedTime = ""
self.datetime = self.get_datetime(timestampUsec) self.chat.datetime = self.get_datetime(timestampUsec)
self.message, self.messageEx = self.get_message(self.renderer) self.chat.message, self.chat.messageEx = self.get_message(self.item)
self.id = self.renderer.get('id') self.chat.id = self.item.get('id')
self.amountValue = 0.0 self.chat.amountValue = 0.0
self.amountString = "" self.chat.amountString = ""
self.currency = "" self.chat.currency = ""
self.bgColor = 0 self.chat.bgColor = 0
def get_authordetails(self): def get_authordetails(self):
self.author.badgeUrl = "" self.chat.author.badgeUrl = ""
(self.author.isVerified, (self.chat.author.isVerified,
self.author.isChatOwner, self.chat.author.isChatOwner,
self.author.isChatSponsor, self.chat.author.isChatSponsor,
self.author.isChatModerator) = ( self.chat.author.isChatModerator) = (
self.get_badges(self.renderer) self.get_badges(self.item)
) )
self.author.channelId = self.renderer.get("authorExternalChannelId") self.chat.author.channelId = self.item.get("authorExternalChannelId")
self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId self.chat.author.channelUrl = "http://www.youtube.com/channel/" + self.chat.author.channelId
self.author.name = self.renderer["authorName"]["simpleText"] self.chat.author.name = self.item["authorName"]["simpleText"]
self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"] self.chat.author.imageUrl = self.item["authorPhoto"]["thumbnails"][1]["url"]
def get_message(self, renderer): def get_message(self, item):
message = '' message = ''
message_ex = [] message_ex = []
if renderer.get("message"): runs = item.get("message", {}).get("runs", {})
runs = renderer["message"].get("runs") for r in runs:
if runs: if not hasattr(r, "get"):
for r in runs: continue
if r: if r.get('emoji'):
if r.get('emoji'): message += r['emoji'].get('shortcuts', [''])[0]
message += r['emoji'].get('shortcuts', [''])[0] message_ex.append({
message_ex.append({ 'id': r['emoji'].get('emojiId').split('/')[-1],
'id': r['emoji'].get('emojiId').split('/')[-1], 'txt': r['emoji'].get('shortcuts', [''])[0],
'txt': r['emoji'].get('shortcuts', [''])[0], 'url': r['emoji']['image']['thumbnails'][0].get('url')
'url': r['emoji']['image']['thumbnails'][0].get('url') })
}) else:
else: message += r.get('text', '')
message += r.get('text', '') message_ex.append(r.get('text', ''))
message_ex.append(r.get('text', ''))
return message, message_ex return message, message_ex
def get_badges(self, renderer): def get_badges(self, renderer):
self.author.type = '' self.chat.author.type = ''
isVerified = False isVerified = False
isChatOwner = False isChatOwner = False
isChatSponsor = False isChatSponsor = False
isChatModerator = False isChatModerator = False
badges = renderer.get("authorBadges") badges = renderer.get("authorBadges", {})
if badges: for badge in badges:
for badge in badges: if badge["liveChatAuthorBadgeRenderer"].get("icon"):
if badge["liveChatAuthorBadgeRenderer"].get("icon"): author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"] self.chat.author.type = author_type
self.author.type = author_type if author_type == 'VERIFIED':
if author_type == 'VERIFIED': isVerified = True
isVerified = True if author_type == 'OWNER':
if author_type == 'OWNER': isChatOwner = True
isChatOwner = True if author_type == 'MODERATOR':
if author_type == 'MODERATOR': isChatModerator = True
isChatModerator = True if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"):
if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"): isChatSponsor = True
isChatSponsor = True self.chat.author.type = 'MEMBER'
self.author.type = 'MEMBER' self.get_badgeurl(badge)
self.get_badgeurl(badge)
return isVerified, isChatOwner, isChatSponsor, isChatModerator return isVerified, isChatOwner, isChatSponsor, isChatModerator
def get_badgeurl(self, badge): def get_badgeurl(self, badge):
self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"] self.chat.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"]
def get_datetime(self, timestamp): def get_datetime(self, timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000) dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S') return dt.strftime('%Y-%m-%d %H:%M:%S')
def get_chatobj(self):
return self.chat
def clear(self):
self.item = None
self.chat = None

View File

@@ -2,14 +2,14 @@ from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer): class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item): def settype(self):
super().__init__(item, "newSponsor") self.chat.type = "newSponsor"
def get_authordetails(self): def get_authordetails(self):
super().get_authordetails() super().get_authordetails()
self.author.isChatSponsor = True self.chat.author.isChatSponsor = True
def get_message(self, renderer): def get_message(self, item):
message = (renderer["eventText"]["runs"][0]["text"] message = (item["eventText"]["runs"][0]["text"]
) + ' / ' + (renderer["detailText"]["simpleText"]) ) + ' / ' + (item["detailText"]["simpleText"])
return message, [message] return message, [message]

View File

@@ -2,14 +2,17 @@ from .base import BaseRenderer
class LiveChatMembershipItemRenderer(BaseRenderer): class LiveChatMembershipItemRenderer(BaseRenderer):
def __init__(self, item): def settype(self):
super().__init__(item, "newSponsor") self.chat.type = "newSponsor"
def get_authordetails(self): def get_authordetails(self):
super().get_authordetails() super().get_authordetails()
self.author.isChatSponsor = True self.chat.author.isChatSponsor = True
def get_message(self, renderer): def get_message(self, item):
message = ''.join([mes.get("text", "") try:
for mes in renderer["headerSubtext"]["runs"]]) message = ''.join([mes.get("text", "")
for mes in item["headerSubtext"]["runs"]])
except KeyError:
return "Welcome New Member!", ["Welcome New Member!"]
return message, [message] return message, [message]

View File

@@ -9,23 +9,23 @@ class Colors:
class LiveChatPaidMessageRenderer(BaseRenderer): class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item): def settype(self):
super().__init__(item, "superChat") self.chat.type = "superChat"
def get_snippet(self): def get_snippet(self):
super().get_snippet() super().get_snippet()
amountDisplayString, symbol, amount = ( amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer) self.get_amountdata(self.item)
) )
self.amountValue = amount self.chat.amountValue = amount
self.amountString = amountDisplayString self.chat.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol symbol) else symbol
self.bgColor = self.renderer.get("bodyBackgroundColor", 0) self.chat.bgColor = self.item.get("bodyBackgroundColor", 0)
self.colors = self.get_colors() self.chat.colors = self.get_colors()
def get_amountdata(self, renderer): def get_amountdata(self, item):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"] amountDisplayString = item["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString) m = superchat_regex.search(amountDisplayString)
if m: if m:
symbol = m.group(1) symbol = m.group(1)
@@ -36,11 +36,12 @@ class LiveChatPaidMessageRenderer(BaseRenderer):
return amountDisplayString, symbol, amount return amountDisplayString, symbol, amount
def get_colors(self): def get_colors(self):
item = self.item
colors = Colors() colors = Colors()
colors.headerBackgroundColor = self.renderer.get("headerBackgroundColor", 0) colors.headerBackgroundColor = item.get("headerBackgroundColor", 0)
colors.headerTextColor = self.renderer.get("headerTextColor", 0) colors.headerTextColor = item.get("headerTextColor", 0)
colors.bodyBackgroundColor = self.renderer.get("bodyBackgroundColor", 0) colors.bodyBackgroundColor = item.get("bodyBackgroundColor", 0)
colors.bodyTextColor = self.renderer.get("bodyTextColor", 0) colors.bodyTextColor = item.get("bodyTextColor", 0)
colors.timestampColor = self.renderer.get("timestampColor", 0) colors.timestampColor = item.get("timestampColor", 0)
colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) colors.authorNameTextColor = item.get("authorNameTextColor", 0)
return colors return colors

View File

@@ -4,30 +4,30 @@ from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$") superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class Colors: class Colors2:
pass pass
class LiveChatPaidStickerRenderer(BaseRenderer): class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item): def settype(self):
super().__init__(item, "superSticker") self.chat.type = "superSticker"
def get_snippet(self): def get_snippet(self):
super().get_snippet() super().get_snippet()
amountDisplayString, symbol, amount = ( amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer) self.get_amountdata(self.item)
) )
self.amountValue = amount self.chat.amountValue = amount
self.amountString = amountDisplayString self.chat.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get( self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol symbol) else symbol
self.bgColor = self.renderer.get("backgroundColor", 0) self.chat.bgColor = self.item.get("backgroundColor", 0)
self.sticker = "".join(("https:", self.chat.sticker = "".join(("https:",
self.renderer["sticker"]["thumbnails"][0]["url"])) self.item["sticker"]["thumbnails"][0]["url"]))
self.colors = self.get_colors() self.chat.colors = self.get_colors()
def get_amountdata(self, renderer): def get_amountdata(self, item):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"] amountDisplayString = item["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString) m = superchat_regex.search(amountDisplayString)
if m: if m:
symbol = m.group(1) symbol = m.group(1)
@@ -38,9 +38,10 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
return amountDisplayString, symbol, amount return amountDisplayString, symbol, amount
def get_colors(self): def get_colors(self):
colors = Colors() item = self.item
colors.moneyChipBackgroundColor = self.renderer.get("moneyChipBackgroundColor", 0) colors = Colors2()
colors.moneyChipTextColor = self.renderer.get("moneyChipTextColor", 0) colors.moneyChipBackgroundColor = item.get("moneyChipBackgroundColor", 0)
colors.backgroundColor = self.renderer.get("backgroundColor", 0) colors.moneyChipTextColor = item.get("moneyChipTextColor", 0)
colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0) colors.backgroundColor = item.get("backgroundColor", 0)
colors.authorNameTextColor = item.get("authorNameTextColor", 0)
return colors return colors

View File

@@ -2,5 +2,5 @@ from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer): class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item): def settype(self):
super().__init__(item, "textMessage") self.chat.type = "textMessage"

View File

@@ -3,7 +3,7 @@ import os
import re import re
import time import time
from base64 import standard_b64encode from base64 import standard_b64encode
from httpx import NetworkError, ReadTimeout from concurrent.futures import ThreadPoolExecutor
from .chat_processor import ChatProcessor from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor from .default.processor import DefaultProcessor
from ..exceptions import UnknownConnectionError from ..exceptions import UnknownConnectionError
@@ -48,12 +48,14 @@ class HTMLArchiver(ChatProcessor):
''' '''
def __init__(self, save_path, callback=None): def __init__(self, save_path, callback=None):
super().__init__() super().__init__()
self.client = httpx.Client(http2=True)
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor() self.processor = DefaultProcessor()
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML] self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)] self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
self.callback = callback self.callback = callback
self.executor = ThreadPoolExecutor(max_workers=10)
def _checkpath(self, filepath): def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath)) splitter = os.path.splitext(os.path.basename(filepath))
@@ -80,7 +82,7 @@ class HTMLArchiver(ChatProcessor):
save_path : str : save_path : str :
Actual save path of file. Actual save path of file.
total_lines : int : total_lines : int :
count of total lines written to the file. Count of total lines written to the file.
""" """
if chat_components is None or len(chat_components) == 0: if chat_components is None or len(chat_components) == 0:
return return
@@ -118,9 +120,9 @@ class HTMLArchiver(ChatProcessor):
err = None err = None
for _ in range(5): for _ in range(5):
try: try:
resp = httpx.get(url, timeout=30) resp = self.client.get(url, timeout=30)
break break
except (NetworkError, ReadTimeout) as e: except httpx.HTTPError as e:
print("Network Error. retrying...") print("Network Error. retrying...")
err = e err = e
time.sleep(3) time.sleep(3)
@@ -132,7 +134,7 @@ class HTMLArchiver(ChatProcessor):
def _set_emoji_table(self, item: dict): def _set_emoji_table(self, item: dict):
emoji_id = item['id'] emoji_id = item['id']
if emoji_id not in self.emoji_table: if emoji_id not in self.emoji_table:
self.emoji_table.setdefault(emoji_id, self._encode_img(item['url'])) self.emoji_table.setdefault(emoji_id, self.executor.submit(self._encode_img, item['url']))
return emoji_id return emoji_id
def _stylecode(self, name, code, width, height): def _stylecode(self, name, code, width, height):
@@ -143,11 +145,12 @@ class HTMLArchiver(ChatProcessor):
def _create_styles(self): def _create_styles(self):
return '\n'.join(('<style type="text/css">', return '\n'.join(('<style type="text/css">',
TABLE_CSS, TABLE_CSS,
'\n'.join(self._stylecode(key, self.emoji_table[key], 24, 24) '\n'.join(self._stylecode(key, self.emoji_table[key].result(), 24, 24)
for key in self.emoji_table.keys()), for key in self.emoji_table.keys()),
'</style>\n')) '</style>\n'))
def finalize(self): def finalize(self):
self.executor.shutdown()
self.header.extend([self._create_styles(), '</head>\n']) self.header.extend([self._create_styles(), '</head>\n'])
self.body.extend(['</table>\n</body>\n</html>']) self.body.extend(['</table>\n</body>\n</html>'])
with open(self.save_path, mode='a', encoding='utf-8') as f: with open(self.save_path, mode='a', encoding='utf-8') as f:

View File

@@ -9,7 +9,6 @@ from ... import config
from ... paramgen import arcparam from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from httpx import NetworkError, TimeoutException, ConnectError
from json import JSONDecodeError from json import JSONDecodeError
from urllib.parse import quote from urllib.parse import quote
@@ -81,7 +80,7 @@ def ready_blocks(video_id, duration, div, callback):
break break
except JSONDecodeError: except JSONDecodeError:
await asyncio.sleep(3) await asyncio.sleep(3)
except (NetworkError, TimeoutException, ConnectError) as e: except httpx.HTTPError as e:
err = e err = e
await asyncio.sleep(3) await asyncio.sleep(3)
else: else:
@@ -137,7 +136,7 @@ def fetch_patch(callback, blocks, video_id):
break break
except JSONDecodeError: except JSONDecodeError:
await asyncio.sleep(3) await asyncio.sleep(3)
except (NetworkError, TimeoutException, ConnectError) as e: except httpx.HTTPError as e:
err = e err = e
await asyncio.sleep(3) await asyncio.sleep(3)
except socket.error as error: except socket.error as error:

View File

@@ -2,15 +2,14 @@ import httpx
import json import json
import re import re
import time import time
from httpx import ConnectError, NetworkError, TimeoutException
from .. import config from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})") pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
pattern2 = re.compile(r"yt\.setConfig\((\{[\s\S]*?\})\);")
item_channel_id = [ item_channel_id = [
"videoDetails", "videoDetails",
@@ -32,6 +31,10 @@ item_response = [
"embedded_player_response" "embedded_player_response"
] ]
item_response2 = [
"PLAYER_VARS",
"embedded_player_response"
]
item_author_image = [ item_author_image = [
"videoDetails", "videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer", "embeddedPlayerOverlayVideoDetailsRenderer",
@@ -83,6 +86,8 @@ class VideoInfo:
def __init__(self, video_id): def __init__(self, video_id):
self.video_id = extract_video_id(video_id) self.video_id = extract_video_id(video_id)
self.client = httpx.Client(http2=True)
self.new_pattern_text = False
err = None err = None
for _ in range(3): for _ in range(3):
try: try:
@@ -90,7 +95,6 @@ class VideoInfo:
self._parse(text) self._parse(text)
break break
except (InvalidVideoIdException, UnknownConnectionError) as e: except (InvalidVideoIdException, UnknownConnectionError) as e:
print(str(e))
raise e raise e
except Exception as e: except Exception as e:
err = e err = e
@@ -104,10 +108,10 @@ class VideoInfo:
err = None err = None
for _ in range(3): for _ in range(3):
try: try:
resp = httpx.get(url, headers=headers) resp = self.client.get(url, headers=headers)
resp.raise_for_status() resp.raise_for_status()
break break
except (ConnectError, NetworkError, TimeoutException) as e: except httpx.HTTPError as e:
err = e err = e
time.sleep(3) time.sleep(3)
else: else:
@@ -118,12 +122,25 @@ class VideoInfo:
def _parse(self, text): def _parse(self, text):
result = re.search(pattern, text) result = re.search(pattern, text)
if result is None: if result is None:
raise PatternUnmatchError(doc=text) result = re.search(pattern2, text)
if result is None:
raise PatternUnmatchError(doc=text)
else:
self.new_pattern_text = True
decoder = json.JSONDecoder() decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0] if self.new_pattern_text:
response = self._get_item(res, item_response) res = decoder.raw_decode(result.group(1))[0]
else:
res = decoder.raw_decode(result.group(1)[:-1])[0]
if self.new_pattern_text:
response = self._get_item(res, item_response2)
else:
response = self._get_item(res, item_response)
if response is None: if response is None:
self._check_video_is_private(res.get("args")) if self.new_pattern_text:
self._check_video_is_private(res.get("PLAYER_VARS"))
else:
self._check_video_is_private(res.get("args"))
self._renderer = self._get_item(json.loads(response), item_renderer) self._renderer = self._get_item(json.loads(response), item_renderer)
if self._renderer is None: if self._renderer is None:
raise InvalidVideoIdException( raise InvalidVideoIdException(

View File

@@ -17,12 +17,12 @@ def extract_video_id(url_or_id: str) -> str:
return url_or_id return url_or_id
match = re.search(PATTERN, url_or_id) match = re.search(PATTERN, url_or_id)
if match is None: if match is None:
raise InvalidVideoIdException(url_or_id) raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
try: try:
ret = match.group(4) ret = match.group(4)
except IndexError: except IndexError:
raise InvalidVideoIdException(url_or_id) raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH: if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(url_or_id) raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret return ret

View File

@@ -17,7 +17,6 @@ def test_textmessage(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
assert ret.chattype == "textMessage"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
@@ -47,7 +46,6 @@ def test_textmessage_replay_member(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
assert ret.chattype == "textMessage"
assert ret.type == "textMessage" assert ret.type == "textMessage"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
@@ -80,8 +78,6 @@ def test_superchat(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "superChat"
assert ret.type == "superChat" assert ret.type == "superChat"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
@@ -124,8 +120,6 @@ def test_supersticker(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "superSticker"
assert ret.type == "superSticker" assert ret.type == "superSticker"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "" assert ret.message == ""
@@ -167,8 +161,6 @@ def test_sponsor(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "newSponsor"
assert ret.type == "newSponsor" assert ret.type == "newSponsor"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "新規メンバー" assert ret.message == "新規メンバー"
@@ -202,8 +194,6 @@ def test_sponsor_legacy(mocker):
} }
ret = processor.process([data]).items[0] ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "newSponsor"
assert ret.type == "newSponsor" assert ret.type == "newSponsor"
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "新規メンバー / ようこそ、author_name" assert ret.message == "新規メンバー / ようこそ、author_name"

View File

@@ -1,6 +1,6 @@
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError from pytchat.exceptions import InvalidVideoIdException
def _open_file(path): def _open_file(path):
@@ -13,7 +13,7 @@ def _set_test_data(filepath, mocker):
response_mock = mocker.Mock() response_mock = mocker.Mock()
response_mock.status_code = 200 response_mock.status_code = 200
response_mock.text = _text response_mock.text = _text
mocker.patch('httpx.get').return_value = response_mock mocker.patch('httpx.Client.get').return_value = response_mock
def test_archived_page(mocker): def test_archived_page(mocker):
@@ -85,7 +85,7 @@ def test_pattern_unmatch(mocker):
try: try:
_ = VideoInfo('__test_id__') _ = VideoInfo('__test_id__')
assert False assert False
except PatternUnmatchError: except JSONDecodeError:
assert True assert True