diff --git a/README.md b/README.md index bc04b23..2ae6c59 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,11 @@ Structure of each item which got from items() function. str emojis are represented by ":(shortcut text):" + + messageEx + str + list of message texts and emoji URLs. + timestamp int @@ -149,7 +154,7 @@ Structure of each item which got from items() function. timestampText str - elapsed time. (ex. "1:02:27") + elapsed time. (ex. "1:02:27") *Replay Only. amountValue @@ -193,7 +198,7 @@ Structure of author object. channelId str - + *chatter's channel ID. NOT broadcasting video's channel ID. channelUrl diff --git a/pytchat/__init__.py b/pytchat/__init__.py index 797adcf..706caf6 100644 --- a/pytchat/__init__.py +++ b/pytchat/__init__.py @@ -2,7 +2,7 @@ pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup. """ __copyright__ = 'Copyright (C) 2019 taizan-hokuto' -__version__ = '0.0.3.1' +__version__ = '0.0.3.3' __license__ = 'MIT' __author__ = 'taizan-hokuto' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' diff --git a/pytchat/core_async/livechat.py b/pytchat/core_async/livechat.py index 15c757c..77fbc3c 100644 --- a/pytchat/core_async/livechat.py +++ b/pytchat/core_async/livechat.py @@ -3,7 +3,6 @@ import datetime import json import random import signal -import threading import time import traceback import urllib.parse @@ -123,7 +122,7 @@ class LiveChatAsync: async def _startlisten(self): """最初のcontinuationパラメータを取得し、 - _listenループを開始する + _listenループのタスクを作成し開始する """ initial_continuation = await self._get_initial_continuation() if initial_continuation is None: @@ -286,13 +285,4 @@ class LiveChatAsync: logger.debug(f"残っているタスクを終了しています") await asyncio.gather(*tasks,return_exceptions=True) loop = asyncio.get_event_loop() - loop.stop() - - - - - - - - - + loop.stop() \ No newline at end of file diff --git a/pytchat/core_async/replaychat.py b/pytchat/core_async/replaychat.py index e28abb7..80169d8 100644 --- a/pytchat/core_async/replaychat.py +++ b/pytchat/core_async/replaychat.py @@ -21,14 +21,19 @@ logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE) MAX_RETRY = 10 headers = config.headers + + class ReplayChatAsync: - ''' aiohttpを利用してYouTubeのライブ配信のチャットデータを取得する + '''asyncio(aiohttp)を利用してYouTubeのチャットデータを取得する。 Parameter --------- video_id : str 動画ID + seektime : int + リプレイするチャットデータの開始時間(秒) + processor : ChatProcessor チャットデータを加工するオブジェクト @@ -46,6 +51,9 @@ class ReplayChatAsync: done_callback : func listener終了時に呼び出すコールバック。 + exception_handler : func + 例外を処理する関数 + direct_mode : bool Trueの場合、bufferを使わずにcallbackを呼ぶ。 Trueの場合、callbackの設定が必須 @@ -53,26 +61,23 @@ class ReplayChatAsync: Attributes --------- - _executor : ThreadPoolExecutor - チャットデータ取得ループ(_listen)用のスレッド - _is_alive : bool - チャット取得を終了したか + チャット取得を停止するためのフラグ ''' _setup_finished = False def __init__(self, video_id, - seektime =0, + seektime = 0, processor = DefaultProcessor(), - buffer = Buffer(maxsize = 20), + buffer = None, interruptable = True, callback = None, done_callback = None, exception_handler = None, direct_mode = False): self.video_id = video_id - self.seektime= seektime + self.seektime = seektime self.processor = processor self._buffer = buffer self._callback = callback @@ -151,8 +156,8 @@ class ReplayChatAsync: async def _listen(self, continuation): ''' continuationに紐付いたチャットデータを取得し - にチャットデータを格納、 - 次のcontinuaitonを取得してループする + Bufferにチャットデータを格納、 + 次のcontinuaitonを取得してループする。 Parameter --------- @@ -163,10 +168,10 @@ class ReplayChatAsync: async with aiohttp.ClientSession() as session: while(continuation and self._is_alive): if self._pauser.empty(): - #pauseが呼ばれて_pauserが空状態のときは一時停止する + #pause await self._pauser.get() - #resumeが呼ばれて_pauserにitemが入ったら再開する - #直後に_pauserにitem(None)を入れてブロックを防ぐ + #resume + #prohibit from blocking by putting None into _pauser. self._pauser.put_nowait(None) livechat_json = (await self._get_livechat_json(continuation, session, headers) @@ -186,11 +191,10 @@ class ReplayChatAsync: else: await self._buffer.put(chat_component) diff_time = timeout - (time.time()-time_mark) - if diff_time < 0 : diff_time=0 await asyncio.sleep(diff_time) continuation = metadata.get('continuation') except ChatParseException as e: - logger.error(f"{str(e)}(動画ID:\"{self.video_id}\")") + logger.info(f"{str(e)}(video_id:\"{self.video_id}\")") return except (TypeError , json.JSONDecodeError) : logger.error(f"{traceback.format_exc(limit = -1)}") diff --git a/pytchat/core_multithread/livechat.py b/pytchat/core_multithread/livechat.py index c47cae8..9d70b00 100644 --- a/pytchat/core_multithread/livechat.py +++ b/pytchat/core_multithread/livechat.py @@ -57,7 +57,7 @@ class LiveChat: チャットデータ取得ループ(_listen)用のスレッド _is_alive : bool - チャット取得を終了したか + チャット取得を停止するためのフラグ ''' _setup_finished = False @@ -142,7 +142,7 @@ class LiveChat: def _listen(self, continuation): ''' continuationに紐付いたチャットデータを取得し - にチャットデータを格納、 + BUfferにチャットデータを格納、 次のcontinuaitonを取得してループする Parameter @@ -157,7 +157,6 @@ class LiveChat: self._get_livechat_json(continuation, session, headers) ) metadata, chatdata = self._parser.parse( livechat_json ) - #チャットデータを含むコンポーネントを組み立ててbufferに投入する timeout = metadata['timeoutMs']/1000 chat_component = { "video_id" : self.video_id, @@ -171,16 +170,12 @@ class LiveChat: ) else: self._buffer.put(chat_component) - #次のchatを取得するまでsleepする diff_time = timeout - (time.time()-time_mark) if diff_time < 0 : diff_time=0 time.sleep(diff_time) - #次のチャットデータのcontinuationパラメータを取り出す。 continuation = metadata.get('continuation') - - #whileループ先頭に戻る except ChatParseException as e: - logger.error(f"{str(e)}(動画ID:\"{self.video_id}\")") + logger.info(f"{str(e)}(video_id:\"{self.video_id}\")") return except (TypeError , json.JSONDecodeError) : logger.error(f"{traceback.format_exc(limit = -1)}") diff --git a/pytchat/core_multithread/replaychat.py b/pytchat/core_multithread/replaychat.py index f8902b1..a72cf98 100644 --- a/pytchat/core_multithread/replaychat.py +++ b/pytchat/core_multithread/replaychat.py @@ -30,6 +30,9 @@ class ReplayChat: video_id : str 動画ID + seektime : int + リプレイするチャットデータの開始時間(秒) + processor : ChatProcessor チャットデータを加工するオブジェクト @@ -65,7 +68,7 @@ class ReplayChat: #チャット監視中のListenerのリスト _listeners= [] def __init__(self, video_id, - seektime =0, + seektime = 0, processor = DefaultProcessor(), buffer = Buffer(maxsize = 20), interruptable = True, @@ -74,7 +77,7 @@ class ReplayChat: direct_mode = False ): self.video_id = video_id - self.seektime= seektime + self.seektime = seektime self.processor = processor self._buffer = buffer self._callback = callback @@ -159,16 +162,15 @@ class ReplayChat: with requests.Session() as session: while(continuation and self._is_alive): if self._pauser.empty(): - #pauseが呼ばれて_pauserが空状態のときは一時停止する + #pause self._pauser.get() - #resumeが呼ばれて_pauserにitemが入ったら再開する - #直後に_pauserにitem(None)を入れてブロックを防ぐ + #resume + #prohibit from blocking by putting None into _pauser. self._pauser.put_nowait(None) livechat_json = ( self._get_livechat_json(continuation, session, headers) ) metadata, chatdata = self._parser.parse( livechat_json ) - #チャットデータを含むコンポーネントを組み立ててbufferに投入する timeout = metadata['timeoutMs']/1000 chat_component = { "video_id" : self.video_id, diff --git a/pytchat/mylogger.py b/pytchat/mylogger.py index 4640307..301a350 100644 --- a/pytchat/mylogger.py +++ b/pytchat/mylogger.py @@ -17,7 +17,7 @@ def get_logger(modname,mode=logging.DEBUG): logger.addHandler(handler1) #create handler2 for recording log file if mode <= logging.DEBUG: - handler2 = logging.FileHandler(filename="log.txt") + handler2 = logging.FileHandler(filename="log.txt", encoding='utf-8') handler2.setLevel(logging.ERROR) handler2.setFormatter(my_formatter) diff --git a/pytchat/parser/replay.py b/pytchat/parser/replay.py index bec801d..c120fe3 100644 --- a/pytchat/parser/replay.py +++ b/pytchat/parser/replay.py @@ -36,9 +36,7 @@ class Parser: raise NoContentsException('チャットデータを取得できませんでした。') interval = self.get_interval(actions) metadata.setdefault("timeoutMs",interval) - chatdata = [] - for action in actions: - chatdata.append(action["replayChatItemAction"]["actions"][0]) + chatdata = [action["replayChatItemAction"]["actions"][0] for action in actions] return metadata, chatdata def get_interval(self, actions: list): diff --git a/pytchat/processors/default/processor.py b/pytchat/processors/default/processor.py index 7806a6d..123157a 100644 --- a/pytchat/processors/default/processor.py +++ b/pytchat/processors/default/processor.py @@ -4,7 +4,9 @@ from .renderer.textmessage import LiveChatTextMessageRenderer from .renderer.paidmessage import LiveChatPaidMessageRenderer from .renderer.paidsticker import LiveChatPaidStickerRenderer from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer - +from ... import config +from ... import mylogger +logger = mylogger.get_logger(__name__,mode=config.LOGGER_MODE) class Chatdata: def __init__(self,chatlist:list, timeout:float): @@ -40,32 +42,31 @@ class DefaultProcessor: if action.get('addChatItemAction') is None: continue if action['addChatItemAction'].get('item') is None: continue - chat = self.parse(action) + chat = self._parse(action) if chat: chatlist.append(chat) return Chatdata(chatlist, float(timeout)) - def parse(self, sitem): + def _parse(self, sitem): action = sitem.get("addChatItemAction") if action: item = action.get("item") if item is None: return None try: - renderer = self.get_renderer(item) + renderer = self._get_renderer(item) if renderer == None: return None renderer.get_snippet() renderer.get_authordetails() except (KeyError,TypeError,AttributeError) as e: - print(f"------{str(type(e))}-{str(e)}----------") - print(sitem) + logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}") return None return renderer - def get_renderer(self, item): + def _get_renderer(self, item): if item.get("liveChatTextMessageRenderer"): renderer = LiveChatTextMessageRenderer(item) elif item.get("liveChatPaidMessageRenderer"): diff --git a/pytchat/processors/default/renderer/base.py b/pytchat/processors/default/renderer/base.py index 507a949..61e7d4c 100644 --- a/pytchat/processors/default/renderer/base.py +++ b/pytchat/processors/default/renderer/base.py @@ -20,6 +20,7 @@ class BaseRenderer: self.timestampText = "" self.datetime = self.get_datetime(timestampUsec) self.message = self.get_message(self.renderer) + self.messageEx = self.get_message_ex(self.renderer) self.id = self.renderer.get('id') self.amountValue= 0.0 self.amountString = "" @@ -54,6 +55,19 @@ class BaseRenderer: message += r.get('text','') return message + def get_message_ex(self,renderer): + message = [] + if renderer.get("message"): + runs=renderer["message"].get("runs") + if runs: + for r in runs: + if r: + if r.get('emoji'): + message.append(r['emoji']['image']['thumbnails'][1].get('url')) + else: + message.append(r.get('text','')) + return message + def get_badges(self,renderer): isVerified = False isChatOwner = False