Compare commits
60 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
864ccddfd7 | ||
|
|
339df69e36 | ||
|
|
76a5b0cd18 | ||
|
|
be0ab2431b | ||
|
|
2edb60c592 | ||
|
|
2c6c3a1ca3 | ||
|
|
4be540793d | ||
|
|
08b86fe596 | ||
|
|
157f3b9952 | ||
|
|
8f3ca2662a | ||
|
|
c4b015861c | ||
|
|
3aa413d59e | ||
|
|
03ba285a16 | ||
|
|
5fe0ee5aa8 | ||
|
|
4e829a25d4 | ||
|
|
15132a9bb8 | ||
|
|
64ace9dad6 | ||
|
|
9a2e96d3a0 | ||
|
|
a3695a59b8 | ||
|
|
bc8655ed62 | ||
|
|
3bdc465740 | ||
|
|
235d6b7212 | ||
|
|
9f0754da57 | ||
|
|
306b0a4564 | ||
|
|
1c49387f1a | ||
|
|
300d96e56c | ||
|
|
0e301f48a8 | ||
|
|
a790ab13a9 | ||
|
|
0456300d19 | ||
|
|
2ef1e7028f | ||
|
|
9413c4a186 | ||
|
|
8a8cef399f | ||
|
|
3bcad12cf6 | ||
|
|
4eb18279fe | ||
|
|
e9ed564e1b | ||
|
|
95f975c93d | ||
|
|
8012e1d191 | ||
|
|
f9480ea1eb | ||
|
|
404727c49c | ||
|
|
6b924a88ef | ||
|
|
56294d6a67 | ||
|
|
283443e374 | ||
|
|
89b51c420f | ||
|
|
96474f10c6 | ||
|
|
5f78a99507 | ||
|
|
78373bf45c | ||
|
|
3e11deed8f | ||
|
|
6daa375adf | ||
|
|
497d84015e | ||
|
|
a90bda674d | ||
|
|
48543b7866 | ||
|
|
5d3c7b5abd | ||
|
|
8df7062873 | ||
|
|
b788f692ad | ||
|
|
713215f1d7 | ||
|
|
f16ef60f11 | ||
|
|
9bbdb6c4de | ||
|
|
2200abf204 | ||
|
|
3ed0cb2c35 | ||
|
|
5fa4d051ee |
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2020 taizan-hokuto
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.1.0'
|
__version__ = '0.2.0'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -1,11 +1,17 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import signal
|
||||||
|
from json.decoder import JSONDecodeError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pytchat.util.extract_video_id import extract_video_id
|
|
||||||
from .arguments import Arguments
|
from .arguments import Arguments
|
||||||
from .. exceptions import InvalidVideoIdException, NoContents
|
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
|
||||||
from .. processors.html_archiver import HTMLArchiver
|
from .. processors.html_archiver import HTMLArchiver
|
||||||
from .. tool.extract.extractor import Extractor
|
from .. tool.extract.extractor import Extractor
|
||||||
from .. tool.videoinfo import VideoInfo
|
from .. tool.videoinfo import VideoInfo
|
||||||
|
from .. util.extract_video_id import extract_video_id
|
||||||
|
from .. import util
|
||||||
from .. import __version__
|
from .. import __version__
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@@ -20,47 +26,101 @@ https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
|||||||
def main():
|
def main():
|
||||||
# Arguments
|
# Arguments
|
||||||
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||||
# parser.add_argument('VideoID_or_URL', type=str, default='__NONE__',nargs='?',
|
|
||||||
# help='Video ID, or URL that includes id.\n'
|
|
||||||
# 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
|
||||||
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
|
parser.add_argument('-v', f'--{Arguments.Name.VIDEO_IDS}', type=str,
|
||||||
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by separating them with commas without spaces.\n'
|
help='Video ID (or URL that includes Video ID). You can specify multiple video IDs by '
|
||||||
|
'separating them with commas without spaces.\n'
|
||||||
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||||
help='Output directory (end with "/"). default="./"', default='./')
|
help='Output directory (end with "/"). default="./"', default='./')
|
||||||
|
parser.add_argument(f'--{Arguments.Name.PBAR}', action='store_true',
|
||||||
|
help='Display rich progress bar')
|
||||||
|
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
|
||||||
|
help='Save error data when error occurs(".dat" file)')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Show version')
|
help='Show version')
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
|
|
||||||
|
if Arguments().pbar:
|
||||||
|
from .progressbar_rich import ProgressBar
|
||||||
|
else:
|
||||||
|
from .progressbar_simple import ProgressBar
|
||||||
if Arguments().print_version:
|
if Arguments().print_version:
|
||||||
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
||||||
return
|
return
|
||||||
|
|
||||||
# Extractor
|
# Extractor
|
||||||
if Arguments().video_ids:
|
if not Arguments().video_ids:
|
||||||
for video_id in Arguments().video_ids:
|
parser.print_help()
|
||||||
if '[' in video_id:
|
|
||||||
video_id = video_id.replace('[', '').replace(']', '')
|
|
||||||
try:
|
|
||||||
info = VideoInfo(video_id)
|
|
||||||
print(f"Extracting...\n"
|
|
||||||
f" video_id: {video_id}\n"
|
|
||||||
f" channel: {info.get_channel_name()}\n"
|
|
||||||
f" title: {info.get_title()}")
|
|
||||||
path = Path(Arguments().output + video_id + '.html')
|
|
||||||
print(f" output path: {path.resolve()}")
|
|
||||||
Extractor(video_id,
|
|
||||||
processor=HTMLArchiver(
|
|
||||||
Arguments().output + video_id + '.html'),
|
|
||||||
callback=_disp_progress
|
|
||||||
).extract()
|
|
||||||
print("\nExtraction end.\n")
|
|
||||||
except InvalidVideoIdException:
|
|
||||||
print("Invalid Video ID or URL:", video_id)
|
|
||||||
except (TypeError, NoContents) as e:
|
|
||||||
print(e)
|
|
||||||
return
|
return
|
||||||
parser.print_help()
|
for video_id in Arguments().video_ids:
|
||||||
|
if '[' in video_id:
|
||||||
|
video_id = video_id.replace('[', '').replace(']', '')
|
||||||
|
try:
|
||||||
|
video_id = extract_video_id(video_id)
|
||||||
|
if os.path.exists(Arguments().output):
|
||||||
|
path = Path(Arguments().output + video_id + '.html')
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError
|
||||||
|
info = VideoInfo(video_id)
|
||||||
|
print(f"Extracting...\n"
|
||||||
|
f" video_id: {video_id}\n"
|
||||||
|
f" channel: {info.get_channel_name()}\n"
|
||||||
|
f" title: {info.get_title()}")
|
||||||
|
|
||||||
|
print(f" output path: {path.resolve()}")
|
||||||
|
duration = info.get_duration()
|
||||||
|
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
||||||
|
ex = Extractor(video_id,
|
||||||
|
callback=pbar._disp,
|
||||||
|
div=10)
|
||||||
|
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
||||||
|
data = ex.extract()
|
||||||
|
if data == []:
|
||||||
|
return False
|
||||||
|
if Arguments().pbar:
|
||||||
|
pbar.reset("#", "=", total=len(data), status="Rendering ")
|
||||||
|
else:
|
||||||
|
pbar.reset("=", "", total=len(data), status="Rendering ")
|
||||||
|
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
|
||||||
|
processor.process(
|
||||||
|
[{'video_id': None,
|
||||||
|
'timeout': 1,
|
||||||
|
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
|
)
|
||||||
|
processor.finalize()
|
||||||
|
if Arguments().pbar:
|
||||||
|
pbar.reset('#', '#', status='Completed ')
|
||||||
|
pbar.close()
|
||||||
|
else:
|
||||||
|
pbar.close()
|
||||||
|
print("\nCompleted")
|
||||||
|
|
||||||
|
print()
|
||||||
|
if pbar.is_cancelled():
|
||||||
|
print("\nThe extraction process has been discontinued.\n")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
except InvalidVideoIdException:
|
||||||
|
print("Invalid Video ID or URL:", video_id)
|
||||||
|
except NoContents as e:
|
||||||
|
print(e)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("The specified directory does not exist.:{}".format(Arguments().output))
|
||||||
|
except JSONDecodeError as e:
|
||||||
|
print(e.msg)
|
||||||
|
print("Cannot parse video information.:{}".format(video_id))
|
||||||
|
if Arguments().save_error_data:
|
||||||
|
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
||||||
|
except PatternUnmatchError as e:
|
||||||
|
print(e.msg)
|
||||||
|
print("Cannot parse video information.:{}".format(video_id))
|
||||||
|
if Arguments().save_error_data:
|
||||||
|
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def _disp_progress(a, b):
|
def cancel(ex, pbar):
|
||||||
print('.', end="", flush=True)
|
ex.cancel()
|
||||||
|
pbar.cancel()
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ class Arguments(metaclass=Singleton):
|
|||||||
VERSION: str = 'version'
|
VERSION: str = 'version'
|
||||||
OUTPUT: str = 'output_dir'
|
OUTPUT: str = 'output_dir'
|
||||||
VIDEO_IDS: str = 'video_id'
|
VIDEO_IDS: str = 'video_id'
|
||||||
|
SAVE_ERROR_DATA: bool = 'save_error_data'
|
||||||
|
PBAR: bool ='pbar'
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||||
@@ -34,10 +36,9 @@ class Arguments(metaclass=Singleton):
|
|||||||
self.print_version: bool = arguments[Arguments.Name.VERSION]
|
self.print_version: bool = arguments[Arguments.Name.VERSION]
|
||||||
self.output: str = arguments[Arguments.Name.OUTPUT]
|
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||||
self.video_ids: List[int] = []
|
self.video_ids: List[int] = []
|
||||||
|
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
|
||||||
|
self.pbar: bool = arguments[Arguments.Name.PBAR]
|
||||||
# Videos
|
# Videos
|
||||||
if arguments[Arguments.Name.VIDEO_IDS]:
|
if arguments[Arguments.Name.VIDEO_IDS]:
|
||||||
self.video_ids = [video_id
|
self.video_ids = [video_id
|
||||||
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
|
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
52
pytchat/cli/progressbar_rich.py
Normal file
52
pytchat/cli/progressbar_rich.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
'''
|
||||||
|
This code for this progress bar is based on
|
||||||
|
vladignatyev/progress.py
|
||||||
|
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
||||||
|
(MIT License)
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBar:
|
||||||
|
def __init__(self, total, status):
|
||||||
|
self._bar_len = 60
|
||||||
|
self._cancelled = False
|
||||||
|
self.reset(total=total, status=status)
|
||||||
|
self._blinker = 0
|
||||||
|
|
||||||
|
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
||||||
|
self._symbol_done = symbol_done
|
||||||
|
self._symbol_space = symbol_space
|
||||||
|
self._total = total
|
||||||
|
self._status = status
|
||||||
|
self._count = 0
|
||||||
|
|
||||||
|
def _disp(self, _, fetched):
|
||||||
|
self._progress(fetched, self._total)
|
||||||
|
|
||||||
|
def _progress(self, fillin, total):
|
||||||
|
if total == 0 or self._cancelled:
|
||||||
|
return
|
||||||
|
self._count += fillin
|
||||||
|
filled_len = int(round(self._bar_len * self._count / float(total)))
|
||||||
|
percents = round(100.0 * self._count / float(total), 1)
|
||||||
|
if percents > 100:
|
||||||
|
percents = 100.0
|
||||||
|
if filled_len > self._bar_len:
|
||||||
|
filled_len = self._bar_len
|
||||||
|
|
||||||
|
bar = self._symbol_done * filled_len + \
|
||||||
|
self._symbol_space * (self._bar_len - filled_len)
|
||||||
|
sys.stdout.write(' [%s] %s%s ...%s \r' % (bar, percents, '%', self._status))
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._blinker += 1
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if not self._cancelled:
|
||||||
|
self._progress(self._total, self._total)
|
||||||
|
|
||||||
|
def cancel(self):
|
||||||
|
self._cancelled = True
|
||||||
|
|
||||||
|
def is_cancelled(self):
|
||||||
|
return self._cancelled
|
||||||
49
pytchat/cli/progressbar_simple.py
Normal file
49
pytchat/cli/progressbar_simple.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
'''
|
||||||
|
This code for this progress bar is based on
|
||||||
|
vladignatyev/progress.py
|
||||||
|
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
||||||
|
(MIT License)
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBar:
|
||||||
|
def __init__(self, total, status):
|
||||||
|
self._bar_len = 60
|
||||||
|
self._cancelled = False
|
||||||
|
print(''.join([' ' * 10, '|', '-' * (self._bar_len), '|']), end="")
|
||||||
|
self.reset(total=total, status=status)
|
||||||
|
|
||||||
|
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
||||||
|
self._symbol_done = symbol_done
|
||||||
|
self._symbol_space = symbol_space
|
||||||
|
self._total = total
|
||||||
|
self._status = status
|
||||||
|
self._old_len = 0
|
||||||
|
self._count = 0
|
||||||
|
print()
|
||||||
|
print(f'{status:<11}', end='')
|
||||||
|
|
||||||
|
def _disp(self, _, fetched):
|
||||||
|
self._progress(fetched, self._total)
|
||||||
|
|
||||||
|
def _progress(self, fillin, total):
|
||||||
|
if total == 0 or self._cancelled:
|
||||||
|
return
|
||||||
|
self._count += fillin
|
||||||
|
filled_len = int(round(self._bar_len * self._count / float(total)))
|
||||||
|
if filled_len > self._bar_len:
|
||||||
|
filled_len = self._bar_len
|
||||||
|
print((filled_len - self._old_len) * self._symbol_done, end="")
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._old_len = filled_len
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if not self._cancelled:
|
||||||
|
self._progress(self._total, self._total)
|
||||||
|
|
||||||
|
def cancel(self):
|
||||||
|
self._cancelled = True
|
||||||
|
|
||||||
|
def is_cancelled(self):
|
||||||
|
return self._cancelled
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
from . import mylogger
|
from . import mylogger
|
||||||
headers = {
|
headers = {
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def logger(module_name: str, loglevel=None):
|
def logger(module_name: str, loglevel=None):
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
import aiohttp
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import httpx
|
||||||
import json
|
import json
|
||||||
import signal
|
import signal
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from aiohttp.client_exceptions import ClientConnectorError
|
|
||||||
from concurrent.futures import CancelledError
|
|
||||||
from asyncio import Queue
|
from asyncio import Queue
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
from .buffer import Buffer
|
from .buffer import Buffer
|
||||||
from ..parser.live import Parser
|
from ..parser.live import Parser
|
||||||
from .. import config
|
from .. import config
|
||||||
@@ -22,7 +22,7 @@ MAX_RETRY = 10
|
|||||||
|
|
||||||
|
|
||||||
class LiveChatAsync:
|
class LiveChatAsync:
|
||||||
'''asyncio(aiohttp)を利用してYouTubeのライブ配信のチャットデータを取得する。
|
'''asyncioを利用してYouTubeのライブ配信のチャットデータを取得する。
|
||||||
|
|
||||||
Parameter
|
Parameter
|
||||||
---------
|
---------
|
||||||
@@ -161,11 +161,11 @@ class LiveChatAsync:
|
|||||||
parameter for next chat data
|
parameter for next chat data
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with httpx.AsyncClient(http2=True) as client:
|
||||||
while(continuation and self._is_alive):
|
while(continuation and self._is_alive):
|
||||||
continuation = await self._check_pause(continuation)
|
continuation = await self._check_pause(continuation)
|
||||||
contents = await self._get_contents(
|
contents = await self._get_contents(
|
||||||
continuation, session, headers)
|
continuation, client, headers)
|
||||||
metadata, chatdata = self._parser.parse(contents)
|
metadata, chatdata = self._parser.parse(contents)
|
||||||
|
|
||||||
timeout = metadata['timeoutMs'] / 1000
|
timeout = metadata['timeoutMs'] / 1000
|
||||||
@@ -210,7 +210,7 @@ class LiveChatAsync:
|
|||||||
self._video_id, 3, self._topchat_only)
|
self._video_id, 3, self._topchat_only)
|
||||||
return continuation
|
return continuation
|
||||||
|
|
||||||
async def _get_contents(self, continuation, session, headers):
|
async def _get_contents(self, continuation, client, headers):
|
||||||
'''Get 'continuationContents' from livechat json.
|
'''Get 'continuationContents' from livechat json.
|
||||||
If contents is None at first fetching,
|
If contents is None at first fetching,
|
||||||
try to fetch archive chat data.
|
try to fetch archive chat data.
|
||||||
@@ -219,7 +219,7 @@ class LiveChatAsync:
|
|||||||
-------
|
-------
|
||||||
'continuationContents' which includes metadata & chatdata.
|
'continuationContents' which includes metadata & chatdata.
|
||||||
'''
|
'''
|
||||||
livechat_json = await self._get_livechat_json(continuation, session, headers)
|
livechat_json = await self._get_livechat_json(continuation, client, headers)
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
if self._first_fetch:
|
if self._first_fetch:
|
||||||
if contents is None or self._is_replay:
|
if contents is None or self._is_replay:
|
||||||
@@ -229,18 +229,18 @@ class LiveChatAsync:
|
|||||||
continuation = arcparam.getparam(
|
continuation = arcparam.getparam(
|
||||||
self._video_id, self.seektime, self._topchat_only)
|
self._video_id, self.seektime, self._topchat_only)
|
||||||
livechat_json = (await self._get_livechat_json(
|
livechat_json = (await self._get_livechat_json(
|
||||||
continuation, session, headers))
|
continuation, client, headers))
|
||||||
reload_continuation = self._parser.reload_continuation(
|
reload_continuation = self._parser.reload_continuation(
|
||||||
self._parser.get_contents(livechat_json))
|
self._parser.get_contents(livechat_json))
|
||||||
if reload_continuation:
|
if reload_continuation:
|
||||||
livechat_json = (await self._get_livechat_json(
|
livechat_json = (await self._get_livechat_json(
|
||||||
reload_continuation, session, headers))
|
reload_continuation, client, headers))
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
self._is_replay = True
|
self._is_replay = True
|
||||||
self._first_fetch = False
|
self._first_fetch = False
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
async def _get_livechat_json(self, continuation, session, headers):
|
async def _get_livechat_json(self, continuation, client, headers):
|
||||||
'''
|
'''
|
||||||
Get json which includes chat data.
|
Get json which includes chat data.
|
||||||
'''
|
'''
|
||||||
@@ -249,14 +249,13 @@ class LiveChatAsync:
|
|||||||
status_code = 0
|
status_code = 0
|
||||||
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||||
for _ in range(MAX_RETRY + 1):
|
for _ in range(MAX_RETRY + 1):
|
||||||
async with session.get(url, headers=headers) as resp:
|
try:
|
||||||
try:
|
resp = await client.get(url, headers=headers)
|
||||||
text = await resp.text()
|
livechat_json = resp.json()
|
||||||
livechat_json = json.loads(text)
|
break
|
||||||
break
|
except (httpx.HTTPError, json.JSONDecodeError):
|
||||||
except (ClientConnectorError, json.JSONDecodeError):
|
await asyncio.sleep(1)
|
||||||
await asyncio.sleep(1)
|
continue
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
self._logger.error(f"[{self._video_id}]"
|
self._logger.error(f"[{self._video_id}]"
|
||||||
f"Exceeded retry count. status_code={status_code}")
|
f"Exceeded retry count. status_code={status_code}")
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import requests
|
import httpx
|
||||||
import json
|
import json
|
||||||
import signal
|
import signal
|
||||||
import time
|
import time
|
||||||
@@ -153,10 +153,10 @@ class LiveChat:
|
|||||||
parameter for next chat data
|
parameter for next chat data
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
with requests.Session() as session:
|
with httpx.Client(http2=True) as client:
|
||||||
while(continuation and self._is_alive):
|
while(continuation and self._is_alive):
|
||||||
continuation = self._check_pause(continuation)
|
continuation = self._check_pause(continuation)
|
||||||
contents = self._get_contents(continuation, session, headers)
|
contents = self._get_contents(continuation, client, headers)
|
||||||
metadata, chatdata = self._parser.parse(contents)
|
metadata, chatdata = self._parser.parse(contents)
|
||||||
timeout = metadata['timeoutMs'] / 1000
|
timeout = metadata['timeoutMs'] / 1000
|
||||||
chat_component = {
|
chat_component = {
|
||||||
@@ -199,7 +199,7 @@ class LiveChat:
|
|||||||
continuation = liveparam.getparam(self._video_id, 3)
|
continuation = liveparam.getparam(self._video_id, 3)
|
||||||
return continuation
|
return continuation
|
||||||
|
|
||||||
def _get_contents(self, continuation, session, headers):
|
def _get_contents(self, continuation, client, headers):
|
||||||
'''Get 'continuationContents' from livechat json.
|
'''Get 'continuationContents' from livechat json.
|
||||||
If contents is None at first fetching,
|
If contents is None at first fetching,
|
||||||
try to fetch archive chat data.
|
try to fetch archive chat data.
|
||||||
@@ -209,7 +209,7 @@ class LiveChat:
|
|||||||
'continuationContents' which includes metadata & chat data.
|
'continuationContents' which includes metadata & chat data.
|
||||||
'''
|
'''
|
||||||
livechat_json = (
|
livechat_json = (
|
||||||
self._get_livechat_json(continuation, session, headers)
|
self._get_livechat_json(continuation, client, headers)
|
||||||
)
|
)
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
if self._first_fetch:
|
if self._first_fetch:
|
||||||
@@ -219,18 +219,18 @@ class LiveChat:
|
|||||||
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||||
continuation = arcparam.getparam(
|
continuation = arcparam.getparam(
|
||||||
self._video_id, self.seektime, self._topchat_only)
|
self._video_id, self.seektime, self._topchat_only)
|
||||||
livechat_json = (self._get_livechat_json(continuation, session, headers))
|
livechat_json = (self._get_livechat_json(continuation, client, headers))
|
||||||
reload_continuation = self._parser.reload_continuation(
|
reload_continuation = self._parser.reload_continuation(
|
||||||
self._parser.get_contents(livechat_json))
|
self._parser.get_contents(livechat_json))
|
||||||
if reload_continuation:
|
if reload_continuation:
|
||||||
livechat_json = (self._get_livechat_json(
|
livechat_json = (self._get_livechat_json(
|
||||||
reload_continuation, session, headers))
|
reload_continuation, client, headers))
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
self._is_replay = True
|
self._is_replay = True
|
||||||
self._first_fetch = False
|
self._first_fetch = False
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
def _get_livechat_json(self, continuation, session, headers):
|
def _get_livechat_json(self, continuation, client, headers):
|
||||||
'''
|
'''
|
||||||
Get json which includes chat data.
|
Get json which includes chat data.
|
||||||
'''
|
'''
|
||||||
@@ -239,10 +239,9 @@ class LiveChat:
|
|||||||
status_code = 0
|
status_code = 0
|
||||||
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||||
for _ in range(MAX_RETRY + 1):
|
for _ in range(MAX_RETRY + 1):
|
||||||
with session.get(url, headers=headers) as resp:
|
with client:
|
||||||
try:
|
try:
|
||||||
text = resp.text
|
livechat_json = client.get(url, headers=headers).json()
|
||||||
livechat_json = json.loads(text)
|
|
||||||
break
|
break
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
@@ -62,3 +62,18 @@ class ReceivedUnknownContinuation(ChatParseException):
|
|||||||
|
|
||||||
class FailedExtractContinuation(ChatDataFinished):
|
class FailedExtractContinuation(ChatDataFinished):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class VideoInfoParseError(Exception):
|
||||||
|
'''
|
||||||
|
thrown when failed to parse video info
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class PatternUnmatchError(VideoInfoParseError):
|
||||||
|
'''
|
||||||
|
thrown when failed to parse video info with unmatched pattern
|
||||||
|
'''
|
||||||
|
def __init__(self, doc):
|
||||||
|
self.msg = "PatternUnmatchError"
|
||||||
|
self.doc = doc
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
|
|||||||
self.amountString = amountDisplayString
|
self.amountString = amountDisplayString
|
||||||
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
|
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
|
||||||
symbol) else symbol
|
symbol) else symbol
|
||||||
self.bgColor = self.renderer.get("moneyChipBackgroundColor", 0)
|
self.bgColor = self.renderer.get("backgroundColor", 0)
|
||||||
self.sticker = "".join(("https:",
|
self.sticker = "".join(("https:",
|
||||||
self.renderer["sticker"]["thumbnails"][0]["url"]))
|
self.renderer["sticker"]["thumbnails"][0]["url"]))
|
||||||
self.colors = self.get_colors()
|
self.colors = self.get_colors()
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import requests
|
import httpx
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
from .chat_processor import ChatProcessor
|
from .chat_processor import ChatProcessor
|
||||||
from .default.processor import DefaultProcessor
|
from .default.processor import DefaultProcessor
|
||||||
@@ -43,20 +43,21 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
'''
|
'''
|
||||||
HTMLArchiver saves chat data as HTML table format.
|
HTMLArchiver saves chat data as HTML table format.
|
||||||
'''
|
'''
|
||||||
def __init__(self, save_path):
|
def __init__(self, save_path, callback=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.save_path = self._checkpath(save_path)
|
self.save_path = self._checkpath(save_path)
|
||||||
self.processor = DefaultProcessor()
|
self.processor = DefaultProcessor()
|
||||||
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||||
self.header = [HEADER_HTML]
|
self.header = [HEADER_HTML]
|
||||||
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
||||||
|
self.callback = callback
|
||||||
|
|
||||||
def _checkpath(self, filepath):
|
def _checkpath(self, filepath):
|
||||||
splitter = os.path.splitext(os.path.basename(filepath))
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
body = splitter[0]
|
body = splitter[0]
|
||||||
extention = splitter[1]
|
extention = splitter[1]
|
||||||
newpath = filepath
|
newpath = filepath
|
||||||
counter = 0
|
counter = 1
|
||||||
while os.path.exists(newpath):
|
while os.path.exists(newpath):
|
||||||
match = re.search(PATTERN, body)
|
match = re.search(PATTERN, body)
|
||||||
if match:
|
if match:
|
||||||
@@ -80,17 +81,20 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
"""
|
"""
|
||||||
if chat_components is None or len(chat_components) == 0:
|
if chat_components is None or len(chat_components) == 0:
|
||||||
return
|
return
|
||||||
self.body.extend(
|
for c in self.processor.process(chat_components).items:
|
||||||
(self._parse_html_line((
|
self.body.extend(
|
||||||
c.datetime,
|
self._parse_html_line((
|
||||||
c.elapsedTime,
|
c.datetime,
|
||||||
c.author.name,
|
c.elapsedTime,
|
||||||
self._parse_message(c.messageEx),
|
c.author.name,
|
||||||
c.amountString,
|
self._parse_message(c.messageEx),
|
||||||
c.author.type,
|
c.amountString,
|
||||||
c.author.channelId)
|
c.author.type,
|
||||||
) for c in self.processor.process(chat_components).items)
|
c.author.channelId)
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
if self.callback:
|
||||||
|
self.callback(None, 1)
|
||||||
|
|
||||||
def _parse_html_line(self, raw_line):
|
def _parse_html_line(self, raw_line):
|
||||||
return ''.join(('<tr>',
|
return ''.join(('<tr>',
|
||||||
@@ -108,7 +112,7 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
for item in message_items)
|
for item in message_items)
|
||||||
|
|
||||||
def _encode_img(self, url):
|
def _encode_img(self, url):
|
||||||
resp = requests.get(url)
|
resp = httpx.get(url)
|
||||||
return standard_b64encode(resp.content).decode()
|
return standard_b64encode(resp.content).decode()
|
||||||
|
|
||||||
def _set_emoji_table(self, item: dict):
|
def _set_emoji_table(self, item: dict):
|
||||||
@@ -131,7 +135,7 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
|
|
||||||
def finalize(self):
|
def finalize(self):
|
||||||
self.header.extend([self._create_styles(), '</head>\n'])
|
self.header.extend([self._create_styles(), '</head>\n'])
|
||||||
self.body.extend(['</table>\n</body>'])
|
self.body.extend(['</table>\n</body>\n</html>'])
|
||||||
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
||||||
f.writelines(self.header)
|
f.writelines(self.header)
|
||||||
f.writelines(self.body)
|
f.writelines(self.body)
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import aiohttp
|
import httpx
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
|
||||||
from . import parser
|
from . import parser
|
||||||
from . block import Block
|
from . block import Block
|
||||||
from . worker import ExtractWorker
|
from . worker import ExtractWorker
|
||||||
@@ -55,7 +54,7 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
async def _get_blocks(video_id, duration, div, callback):
|
async def _get_blocks(video_id, duration, div, callback):
|
||||||
async with aiohttp.ClientSession() as session:
|
async with httpx.AsyncClient(http2=True) as session:
|
||||||
tasks = [_create_block(session, video_id, seektime, callback)
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
for seektime in _split(-1, duration, div)]
|
for seektime in _split(-1, duration, div)]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
@@ -65,9 +64,8 @@ def ready_blocks(video_id, duration, div, callback):
|
|||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
async with session.get(url, headers=headers) as resp:
|
resp = await session.get(url, headers=headers)
|
||||||
text = await resp.text()
|
next_continuation, actions = parser.parse(resp.json())
|
||||||
next_continuation, actions = parser.parse(json.loads(text))
|
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
@@ -106,7 +104,7 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
)
|
)
|
||||||
for block in blocks
|
for block in blocks
|
||||||
]
|
]
|
||||||
async with aiohttp.ClientSession() as session:
|
async with httpx.AsyncClient() as session:
|
||||||
tasks = [worker.run(session) for worker in workers]
|
tasks = [worker.run(session) for worker in workers]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
@@ -114,9 +112,8 @@ def fetch_patch(callback, blocks, video_id):
|
|||||||
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
for _ in range(MAX_RETRY_COUNT):
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
try:
|
try:
|
||||||
async with session.get(url, headers=config.headers) as resp:
|
resp = await session.get(url, headers=config.headers)
|
||||||
chat_json = await resp.text()
|
continuation, actions = parser.parse(resp.json())
|
||||||
continuation, actions = parser.parse(json.loads(chat_json))
|
|
||||||
break
|
break
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class Extractor:
|
|||||||
|
|
||||||
def extract(self):
|
def extract(self):
|
||||||
if self.duration == 0:
|
if self.duration == 0:
|
||||||
print("video is not archived.")
|
print("\nCannot extract chat data:\n The specified video has not yet been archived.")
|
||||||
return []
|
return []
|
||||||
data = self._execute_extract_operations()
|
data = self._execute_extract_operations()
|
||||||
if self.processor is None:
|
if self.processor is None:
|
||||||
|
|||||||
@@ -42,10 +42,14 @@ def get_offset(item):
|
|||||||
|
|
||||||
|
|
||||||
def get_id(item):
|
def get_id(item):
|
||||||
return list((list(item['replayChatItemAction']["actions"][0].values()
|
a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
|
||||||
)[0])['item'].values())[0].get('id')
|
if a:
|
||||||
|
return list(a.values())[0].get('id')
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_type(item):
|
def get_type(item):
|
||||||
return list((list(item['replayChatItemAction']["actions"][0].values()
|
a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
|
||||||
)[0])['item'].keys())[0]
|
if a:
|
||||||
|
return list(a.keys())[0]
|
||||||
|
return None
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from . block import Block
|
from . block import Block
|
||||||
from . patch import fill, split
|
from . patch import fill, split
|
||||||
from ... paramgen import arcparam
|
from ... paramgen import arcparam
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
|
||||||
class ExtractWorker:
|
class ExtractWorker:
|
||||||
@@ -76,7 +77,7 @@ def _search_new_block(worker) -> Block:
|
|||||||
return new_block
|
return new_block
|
||||||
|
|
||||||
|
|
||||||
def _get_undone_block(blocks) -> (int, Block):
|
def _get_undone_block(blocks) -> Tuple[int, Block]:
|
||||||
min_interval_ms = 120000
|
min_interval_ms = 120000
|
||||||
max_remaining = 0
|
max_remaining = 0
|
||||||
undone_block = None
|
undone_block = None
|
||||||
|
|||||||
@@ -1,141 +0,0 @@
|
|||||||
|
|
||||||
import aiohttp
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
from . import parser
|
|
||||||
from . block import Block
|
|
||||||
from . worker import ExtractWorker
|
|
||||||
from . patch import Patch
|
|
||||||
from ... import config
|
|
||||||
from ... paramgen import arcparam_mining as arcparam
|
|
||||||
from concurrent.futures import CancelledError
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
headers = config.headers
|
|
||||||
REPLAY_URL = "https://www.youtube.com/live_chat_replay?continuation="
|
|
||||||
INTERVAL = 1
|
|
||||||
def _split(start, end, count, min_interval_sec = 120):
|
|
||||||
"""
|
|
||||||
Split section from `start` to `end` into `count` pieces,
|
|
||||||
and returns the beginning of each piece.
|
|
||||||
The `count` is adjusted so that the length of each piece
|
|
||||||
is no smaller than `min_interval`.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
List of the offset of each block's first chat data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not (isinstance(start,int) or isinstance(start,float)) or \
|
|
||||||
not (isinstance(end,int) or isinstance(end,float)):
|
|
||||||
raise ValueError("start/end must be int or float")
|
|
||||||
if not isinstance(count,int):
|
|
||||||
raise ValueError("count must be int")
|
|
||||||
if start>end:
|
|
||||||
raise ValueError("end must be equal to or greater than start.")
|
|
||||||
if count<1:
|
|
||||||
raise ValueError("count must be equal to or greater than 1.")
|
|
||||||
if (end-start)/count < min_interval_sec:
|
|
||||||
count = int((end-start)/min_interval_sec)
|
|
||||||
if count == 0 : count = 1
|
|
||||||
interval= (end-start)/count
|
|
||||||
|
|
||||||
if count == 1:
|
|
||||||
return [start]
|
|
||||||
return sorted( list(set( [int(start + interval*j)
|
|
||||||
for j in range(count) ])))
|
|
||||||
|
|
||||||
def ready_blocks(video_id, duration, div, callback):
|
|
||||||
if div <= 0: raise ValueError
|
|
||||||
|
|
||||||
async def _get_blocks( video_id, duration, div, callback):
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
tasks = [_create_block(session, video_id, seektime, callback)
|
|
||||||
for seektime in _split(0, duration, div)]
|
|
||||||
return await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def _create_block(session, video_id, seektime, callback):
|
|
||||||
continuation = arcparam.getparam(video_id, seektime = seektime)
|
|
||||||
url=(f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
|
|
||||||
f"{int(seektime*1000)}&hidden=false&pbj=1")
|
|
||||||
async with session.get(url, headers = headers) as resp:
|
|
||||||
chat_json = await resp.text()
|
|
||||||
if chat_json is None:
|
|
||||||
return
|
|
||||||
continuation, actions = parser.parse(json.loads(chat_json)[1])
|
|
||||||
first = seektime
|
|
||||||
seektime += INTERVAL
|
|
||||||
if callback:
|
|
||||||
callback(actions, INTERVAL)
|
|
||||||
return Block(
|
|
||||||
continuation = continuation,
|
|
||||||
chat_data = actions,
|
|
||||||
first = first,
|
|
||||||
last = seektime,
|
|
||||||
seektime = seektime
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
fetch initial blocks.
|
|
||||||
"""
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
blocks = loop.run_until_complete(
|
|
||||||
_get_blocks(video_id, duration, div, callback))
|
|
||||||
return blocks
|
|
||||||
|
|
||||||
def fetch_patch(callback, blocks, video_id):
|
|
||||||
|
|
||||||
async def _allocate_workers():
|
|
||||||
workers = [
|
|
||||||
ExtractWorker(
|
|
||||||
fetch = _fetch, block = block,
|
|
||||||
blocks = blocks, video_id = video_id
|
|
||||||
)
|
|
||||||
for block in blocks
|
|
||||||
]
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
tasks = [worker.run(session) for worker in workers]
|
|
||||||
return await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
async def _fetch(seektime,session) -> Patch:
|
|
||||||
continuation = arcparam.getparam(video_id, seektime = seektime)
|
|
||||||
url=(f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
|
|
||||||
f"{int(seektime*1000)}&hidden=false&pbj=1")
|
|
||||||
async with session.get(url,headers = config.headers) as resp:
|
|
||||||
chat_json = await resp.text()
|
|
||||||
actions = []
|
|
||||||
try:
|
|
||||||
if chat_json is None:
|
|
||||||
return Patch()
|
|
||||||
continuation, actions = parser.parse(json.loads(chat_json)[1])
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
if callback:
|
|
||||||
callback(actions, INTERVAL)
|
|
||||||
return Patch(chats = actions, continuation = continuation,
|
|
||||||
seektime = seektime, last = seektime)
|
|
||||||
"""
|
|
||||||
allocate workers and assign blocks.
|
|
||||||
"""
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(_allocate_workers())
|
|
||||||
except CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def _shutdown():
|
|
||||||
print("\nshutdown...")
|
|
||||||
tasks = [t for t in asyncio.all_tasks()
|
|
||||||
if t is not asyncio.current_task()]
|
|
||||||
for task in tasks:
|
|
||||||
task.cancel()
|
|
||||||
try:
|
|
||||||
await task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def cancel():
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.create_task(_shutdown())
|
|
||||||
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
from . import parser
|
|
||||||
class Block:
|
|
||||||
"""Block object represents something like a box
|
|
||||||
to join chunk of chatdata.
|
|
||||||
|
|
||||||
Parameter:
|
|
||||||
---------
|
|
||||||
first : int :
|
|
||||||
videoOffsetTimeMs of the first chat_data
|
|
||||||
(chat_data[0])
|
|
||||||
|
|
||||||
last : int :
|
|
||||||
videoOffsetTimeMs of the last chat_data.
|
|
||||||
(chat_data[-1])
|
|
||||||
|
|
||||||
this value increases as fetching chatdata progresses.
|
|
||||||
|
|
||||||
end : int :
|
|
||||||
target videoOffsetTimeMs of last chat data for extract,
|
|
||||||
equals to first videoOffsetTimeMs of next block.
|
|
||||||
when extract worker reaches this offset, stop fetching.
|
|
||||||
|
|
||||||
continuation : str :
|
|
||||||
continuation param of last chat data.
|
|
||||||
|
|
||||||
chat_data : list
|
|
||||||
|
|
||||||
done : bool :
|
|
||||||
whether this block has been fetched.
|
|
||||||
|
|
||||||
remaining : int :
|
|
||||||
remaining data to extract.
|
|
||||||
equals end - last.
|
|
||||||
|
|
||||||
is_last : bool :
|
|
||||||
whether this block is the last one in blocklist.
|
|
||||||
|
|
||||||
during_split : bool :
|
|
||||||
whether this block is in the process of during_split.
|
|
||||||
while True, this block is excluded from duplicate split procedure.
|
|
||||||
|
|
||||||
seektime : float :
|
|
||||||
the last position of this block(seconds) already fetched.
|
|
||||||
"""
|
|
||||||
|
|
||||||
__slots__ = ['first','last','end','continuation','chat_data','remaining',
|
|
||||||
'done','is_last','during_split','seektime']
|
|
||||||
|
|
||||||
def __init__(self, first = 0, last = 0, end = 0,
|
|
||||||
continuation = '', chat_data = [], is_last = False,
|
|
||||||
during_split = False, seektime = None):
|
|
||||||
self.first = first
|
|
||||||
self.last = last
|
|
||||||
self.end = end
|
|
||||||
self.continuation = continuation
|
|
||||||
self.chat_data = chat_data
|
|
||||||
self.done = False
|
|
||||||
self.remaining = self.end - self.last
|
|
||||||
self.is_last = is_last
|
|
||||||
self.during_split = during_split
|
|
||||||
self.seektime = seektime
|
|
||||||
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
import re
|
|
||||||
from ... import config
|
|
||||||
from ... exceptions import (
|
|
||||||
ResponseContextError,
|
|
||||||
NoContents, NoContinuation)
|
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def parse(jsn):
|
|
||||||
"""
|
|
||||||
Parse replay chat data.
|
|
||||||
Parameter:
|
|
||||||
----------
|
|
||||||
jsn : dict
|
|
||||||
JSON of replay chat data.
|
|
||||||
Returns:
|
|
||||||
------
|
|
||||||
continuation : str
|
|
||||||
actions : list
|
|
||||||
|
|
||||||
"""
|
|
||||||
if jsn is None:
|
|
||||||
raise ValueError("parameter JSON is None")
|
|
||||||
if jsn['response']['responseContext'].get('errors'):
|
|
||||||
raise ResponseContextError(
|
|
||||||
'video_id is invalid or private/deleted.')
|
|
||||||
contents = jsn["response"].get('continuationContents')
|
|
||||||
if contents is None:
|
|
||||||
raise NoContents('No chat data.')
|
|
||||||
|
|
||||||
cont = contents['liveChatContinuation']['continuations'][0]
|
|
||||||
if cont is None:
|
|
||||||
raise NoContinuation('No Continuation')
|
|
||||||
metadata = cont.get('liveChatReplayContinuationData')
|
|
||||||
if metadata:
|
|
||||||
continuation = metadata.get("continuation")
|
|
||||||
actions = contents['liveChatContinuation'].get('actions')
|
|
||||||
if continuation:
|
|
||||||
return continuation, [action["replayChatItemAction"]["actions"][0]
|
|
||||||
for action in actions
|
|
||||||
if list(action['replayChatItemAction']["actions"][0].values()
|
|
||||||
)[0]['item'].get("liveChatPaidMessageRenderer")
|
|
||||||
or list(action['replayChatItemAction']["actions"][0].values()
|
|
||||||
)[0]['item'].get("liveChatPaidStickerRenderer")
|
|
||||||
]
|
|
||||||
return None, []
|
|
||||||
|
|
||||||
|
|
||||||
def get_offset(item):
|
|
||||||
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
|
|
||||||
|
|
||||||
|
|
||||||
def get_id(item):
|
|
||||||
return list((list(item['replayChatItemAction']["actions"][0].values()
|
|
||||||
)[0])['item'].values())[0].get('id')
|
|
||||||
|
|
||||||
|
|
||||||
def get_type(item):
|
|
||||||
return list((list(item['replayChatItemAction']["actions"][0].values()
|
|
||||||
)[0])['item'].keys())[0]
|
|
||||||
|
|
||||||
|
|
||||||
_REGEX_YTINIT = re.compile(
|
|
||||||
"window\\[\"ytInitialData\"\\]\\s*=\\s*({.+?});\\s+")
|
|
||||||
|
|
||||||
|
|
||||||
def extract(text):
|
|
||||||
|
|
||||||
match = re.findall(_REGEX_YTINIT, str(text))
|
|
||||||
if match:
|
|
||||||
return match[0]
|
|
||||||
return None
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
from . import parser
|
|
||||||
from . block import Block
|
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
class Patch(NamedTuple):
|
|
||||||
"""
|
|
||||||
Patch represents chunk of chat data
|
|
||||||
which is fetched by asyncdl.fetch_patch._fetch().
|
|
||||||
"""
|
|
||||||
chats : list = []
|
|
||||||
continuation : str = None
|
|
||||||
seektime : float = None
|
|
||||||
first : int = None
|
|
||||||
last : int = None
|
|
||||||
|
|
||||||
def fill(block:Block, patch:Patch):
|
|
||||||
if patch.last < block.end:
|
|
||||||
set_patch(block, patch)
|
|
||||||
return
|
|
||||||
block.continuation = None
|
|
||||||
|
|
||||||
def set_patch(block:Block, patch:Patch):
|
|
||||||
block.continuation = patch.continuation
|
|
||||||
block.chat_data.extend(patch.chats)
|
|
||||||
block.last = patch.seektime
|
|
||||||
block.seektime = patch.seektime
|
|
||||||
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
from . import asyncdl
|
|
||||||
from . import parser
|
|
||||||
from .. videoinfo import VideoInfo
|
|
||||||
from ... import config
|
|
||||||
from ... exceptions import InvalidVideoIdException
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
headers=config.headers
|
|
||||||
|
|
||||||
class SuperChatMiner:
|
|
||||||
def __init__(self, video_id, duration, div, callback):
|
|
||||||
if not isinstance(div ,int) or div < 1:
|
|
||||||
raise ValueError('div must be positive integer.')
|
|
||||||
elif div > 10:
|
|
||||||
div = 10
|
|
||||||
if not isinstance(duration ,int) or duration < 1:
|
|
||||||
raise ValueError('duration must be positive integer.')
|
|
||||||
self.video_id = video_id
|
|
||||||
self.duration = duration
|
|
||||||
self.div = div
|
|
||||||
self.callback = callback
|
|
||||||
self.blocks = []
|
|
||||||
|
|
||||||
def _ready_blocks(self):
|
|
||||||
blocks = asyncdl.ready_blocks(
|
|
||||||
self.video_id, self.duration, self.div, self.callback)
|
|
||||||
self.blocks = [block for block in blocks if block is not None]
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _set_block_end(self):
|
|
||||||
for i in range(len(self.blocks)-1):
|
|
||||||
self.blocks[i].end = self.blocks[i+1].first
|
|
||||||
self.blocks[-1].end = self.duration
|
|
||||||
self.blocks[-1].is_last =True
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _download_blocks(self):
|
|
||||||
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _combine(self):
|
|
||||||
ret = []
|
|
||||||
for block in self.blocks:
|
|
||||||
ret.extend(block.chat_data)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def extract(self):
|
|
||||||
return (
|
|
||||||
self._ready_blocks()
|
|
||||||
._set_block_end()
|
|
||||||
._download_blocks()
|
|
||||||
._combine()
|
|
||||||
)
|
|
||||||
|
|
||||||
def extract(video_id, div = 1, callback = None, processor = None):
|
|
||||||
duration = 0
|
|
||||||
try:
|
|
||||||
duration = VideoInfo(video_id).get_duration()
|
|
||||||
except InvalidVideoIdException:
|
|
||||||
raise
|
|
||||||
if duration == 0:
|
|
||||||
print("video is live.")
|
|
||||||
return []
|
|
||||||
data = SuperChatMiner(video_id, duration, div, callback).extract()
|
|
||||||
if processor is None:
|
|
||||||
return data
|
|
||||||
return processor.process(
|
|
||||||
[{'video_id':None,'timeout':1,'chatdata' : (action
|
|
||||||
for action in data)}]
|
|
||||||
)
|
|
||||||
|
|
||||||
def cancel():
|
|
||||||
asyncdl.cancel()
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
from . import parser
|
|
||||||
from . block import Block
|
|
||||||
from . patch import Patch, fill
|
|
||||||
from ... paramgen import arcparam
|
|
||||||
INTERVAL = 1
|
|
||||||
class ExtractWorker:
|
|
||||||
"""
|
|
||||||
ExtractWorker associates a download session with a block.
|
|
||||||
|
|
||||||
When the worker finishes fetching, the block
|
|
||||||
being fetched is splitted and assigned the free worker.
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
----------
|
|
||||||
fetch : func :
|
|
||||||
extract function of asyncdl
|
|
||||||
|
|
||||||
block : Block :
|
|
||||||
Block object that includes chat_data
|
|
||||||
|
|
||||||
blocks : list :
|
|
||||||
List of Block(s)
|
|
||||||
|
|
||||||
video_id : str :
|
|
||||||
|
|
||||||
parent_block : Block :
|
|
||||||
the block from which current block is splitted
|
|
||||||
"""
|
|
||||||
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
|
|
||||||
def __init__(self, fetch, block, blocks, video_id ):
|
|
||||||
self.block:Block = block
|
|
||||||
self.fetch = fetch
|
|
||||||
self.blocks:list = blocks
|
|
||||||
self.video_id:str = video_id
|
|
||||||
self.parent_block:Block = None
|
|
||||||
|
|
||||||
async def run(self, session):
|
|
||||||
while self.block.continuation:
|
|
||||||
patch = await self.fetch(
|
|
||||||
self.block.seektime, session)
|
|
||||||
fill(self.block, patch)
|
|
||||||
self.block.seektime += INTERVAL
|
|
||||||
self.block.done = True
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import requests
|
import httpx
|
||||||
from .. import config
|
from .. import config
|
||||||
from ..exceptions import InvalidVideoIdException
|
from ..exceptions import InvalidVideoIdException, PatternUnmatchError
|
||||||
from ..util.extract_video_id import extract_video_id
|
from ..util.extract_video_id import extract_video_id
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
|
|
||||||
pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);")
|
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
|
||||||
|
|
||||||
item_channel_id = [
|
item_channel_id = [
|
||||||
"videoDetails",
|
"videoDetails",
|
||||||
@@ -85,13 +85,16 @@ class VideoInfo:
|
|||||||
|
|
||||||
def _get_page_text(self, video_id):
|
def _get_page_text(self, video_id):
|
||||||
url = f"https://www.youtube.com/embed/{video_id}"
|
url = f"https://www.youtube.com/embed/{video_id}"
|
||||||
resp = requests.get(url, headers=headers)
|
resp = httpx.get(url, headers=headers)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.text
|
return resp.text
|
||||||
|
|
||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
result = re.search(pattern, text)
|
result = re.search(pattern, text)
|
||||||
res = json.loads(result.group(1))
|
if result is None:
|
||||||
|
raise PatternUnmatchError(text)
|
||||||
|
decoder = json.JSONDecoder()
|
||||||
|
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
||||||
response = self._get_item(res, item_response)
|
response = self._get_item(res, item_response)
|
||||||
if response is None:
|
if response is None:
|
||||||
self._check_video_is_private(res.get("args"))
|
self._check_video_is_private(res.get("args"))
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
import requests
|
import httpx
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
|
|
||||||
def extract(url):
|
def extract(url):
|
||||||
_session = requests.Session()
|
_session = httpx.Client(http2=True)
|
||||||
html = _session.get(url, headers=config.headers)
|
html = _session.get(url, headers=config.headers)
|
||||||
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
||||||
) + 'test.json', mode='w', encoding='utf-8') as f:
|
) + 'test.json', mode='w', encoding='utf-8') as f:
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
aiohttp
|
httpx[http2]==0.14.1
|
||||||
protobuf
|
protobuf==3.13.0
|
||||||
pytz
|
pytz
|
||||||
requests
|
|
||||||
urllib3
|
urllib3
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
aioresponses
|
|
||||||
mock
|
mock
|
||||||
mocker
|
mocker
|
||||||
pytest
|
pytest
|
||||||
pytest-mock
|
pytest_httpx
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
import requests
|
import httpx
|
||||||
import pytchat.config as config
|
import pytchat.config as config
|
||||||
from pytchat.paramgen import arcparam
|
from pytchat.paramgen import arcparam
|
||||||
from pytchat.parser.live import Parser
|
from pytchat.parser.live import Parser
|
||||||
@@ -18,14 +18,15 @@ def test_arcparam_1(mocker):
|
|||||||
def test_arcparam_2(mocker):
|
def test_arcparam_2(mocker):
|
||||||
param = arcparam.getparam("SsjCnHOk-Sk", seektime=100)
|
param = arcparam.getparam("SsjCnHOk-Sk", seektime=100)
|
||||||
url = f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?continuation={param}&pbj=1"
|
url = f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?continuation={param}&pbj=1"
|
||||||
resp = requests.Session().get(url, headers=config.headers)
|
resp = httpx.Client(http2=True).get(url, headers=config.headers)
|
||||||
jsn = json.loads(resp.text)
|
jsn = json.loads(resp.text)
|
||||||
parser = Parser(is_replay=True)
|
parser = Parser(is_replay=True)
|
||||||
contents = parser.get_contents(jsn)
|
contents = parser.get_contents(jsn)
|
||||||
_ , chatdata = parser.parse(contents)
|
_, chatdata = parser.parse(contents)
|
||||||
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatTextMessageRenderer"]["id"]
|
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatTextMessageRenderer"]["id"]
|
||||||
assert test_id == "CjoKGkNMYXBzZTdudHVVQ0Zjc0IxZ0FkTnFnQjVREhxDSnlBNHV2bnR1VUNGV0dnd2dvZDd3NE5aZy0w"
|
assert test_id == "CjoKGkNMYXBzZTdudHVVQ0Zjc0IxZ0FkTnFnQjVREhxDSnlBNHV2bnR1VUNGV0dnd2dvZDd3NE5aZy0w"
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_3(mocker):
|
def test_arcparam_3(mocker):
|
||||||
param = arcparam.getparam("01234567890")
|
param = arcparam.getparam("01234567890")
|
||||||
assert param == "op2w0wQmGhxDZzhLRFFvTE1ERXlNelExTmpjNE9UQWdBUT09SARgAXICCAE%3D"
|
assert param == "op2w0wQmGhxDZzhLRFFvTE1ERXlNelExTmpjNE9UQWdBUT09SARgAXICCAE%3D"
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
from pytchat.tool.mining import parser
|
|
||||||
import pytchat.config as config
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
from pytchat.paramgen import arcparam_mining as arcparam
|
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_e(mocker):
|
|
||||||
try:
|
|
||||||
arcparam.getparam("01234567890", -1)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_0(mocker):
|
|
||||||
param = arcparam.getparam("01234567890", 0)
|
|
||||||
|
|
||||||
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
|
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_1(mocker):
|
|
||||||
param = arcparam.getparam("01234567890", seektime=100000)
|
|
||||||
print(param)
|
|
||||||
assert param == "op2w0wQzGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABWgUQgMLXL2AEcgIIAXgB"
|
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_2(mocker):
|
|
||||||
param = arcparam.getparam("PZz9NB0-Z64", 1)
|
|
||||||
url = f"https://www.youtube.com/live_chat_replay?continuation={param}&playerOffsetMs=1000&pbj=1"
|
|
||||||
resp = requests.Session().get(url, headers=config.headers)
|
|
||||||
jsn = json.loads(resp.text)
|
|
||||||
_, chatdata = parser.parse(jsn[1])
|
|
||||||
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatPaidMessageRenderer"]["id"]
|
|
||||||
print(test_id)
|
|
||||||
assert test_id == "ChwKGkNKSGE0YnFJeWVBQ0ZWcUF3Z0VkdGIwRm9R"
|
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_3(mocker):
|
|
||||||
param = arcparam.getparam("01234567890")
|
|
||||||
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
|
|
||||||
@@ -136,7 +136,7 @@ def test_supersticker(mocker):
|
|||||||
assert ret.amountValue == 200
|
assert ret.amountValue == 200
|
||||||
assert ret.amountString == "¥200"
|
assert ret.amountString == "¥200"
|
||||||
assert ret.currency == "JPY"
|
assert ret.currency == "JPY"
|
||||||
assert ret.bgColor == 4278248959
|
assert ret.bgColor == 4278237396
|
||||||
assert ret.sticker == "https://lh3.googleusercontent.com/param_s=s72-rp"
|
assert ret.sticker == "https://lh3.googleusercontent.com/param_s=s72-rp"
|
||||||
assert ret.author.name == "author_name"
|
assert ret.author.name == "author_name"
|
||||||
assert ret.author.channelId == "author_channel_id"
|
assert ret.author.channelId == "author_channel_id"
|
||||||
|
|||||||
@@ -1,77 +0,0 @@
|
|||||||
import aiohttp
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
from pytchat.tool.extract import parser
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from aioresponses import aioresponses
|
|
||||||
from concurrent.futures import CancelledError
|
|
||||||
from pytchat.tool.extract import asyncdl
|
|
||||||
|
|
||||||
def _open_file(path):
|
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
|
|
||||||
def test_asyncdl_split():
|
|
||||||
|
|
||||||
ret = asyncdl._split(0,1000,1)
|
|
||||||
assert ret == [0]
|
|
||||||
|
|
||||||
ret = asyncdl._split(1000,1000,10)
|
|
||||||
assert ret == [1000]
|
|
||||||
|
|
||||||
ret = asyncdl._split(0,1000,5)
|
|
||||||
assert ret == [0,200,400,600,800]
|
|
||||||
|
|
||||||
ret = asyncdl._split(10.5, 700.3, 5)
|
|
||||||
assert ret == [10, 148, 286, 424, 562]
|
|
||||||
|
|
||||||
|
|
||||||
ret = asyncdl._split(0,500,5)
|
|
||||||
assert ret == [0,125,250,375]
|
|
||||||
|
|
||||||
ret = asyncdl._split(0,500,500)
|
|
||||||
assert ret == [0,125,250,375]
|
|
||||||
|
|
||||||
ret = asyncdl._split(-1,1000,5)
|
|
||||||
assert ret == [-1, 199, 399, 599, 799]
|
|
||||||
|
|
||||||
"""invalid argument order"""
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split(500,0,5)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
"""invalid count"""
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split(0,500,-1)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split(0,500,0)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
"""invalid argument type"""
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split(0,5000,5.2)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split(0,5000,"test")
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
|
|
||||||
try:
|
|
||||||
ret = asyncdl._split([0,1],5000,5)
|
|
||||||
assert False
|
|
||||||
except ValueError:
|
|
||||||
assert True
|
|
||||||
@@ -1,60 +1,66 @@
|
|||||||
import aiohttp
|
|
||||||
import asyncio
|
|
||||||
import json
|
import json
|
||||||
import os, sys
|
|
||||||
import time
|
|
||||||
from pytchat.tool.extract import duplcheck
|
from pytchat.tool.extract import duplcheck
|
||||||
from pytchat.tool.extract import parser
|
from pytchat.tool.extract import parser
|
||||||
from pytchat.tool.extract.block import Block
|
from pytchat.tool.extract.block import Block
|
||||||
from pytchat.tool.extract.duplcheck import _dump
|
from pytchat.tool.extract.duplcheck import _dump
|
||||||
def _open_file(path):
|
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path, mode='r', encoding='utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
def test_overlap():
|
def test_overlap():
|
||||||
"""
|
"""
|
||||||
test overlap data
|
test overlap data
|
||||||
operation : [0] [2] [3] [4] -> last :align to end
|
operation : [0] [2] [3] [4] -> last :align to end
|
||||||
[1] , [5] -> no change
|
[1] , [5] -> no change
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/extract_duplcheck/overlap/"+filename))
|
json.loads(_open_file(
|
||||||
|
"tests/testdata/extract_duplcheck/overlap/" + filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
blocks = (
|
blocks = (
|
||||||
Block(first = 0, last= 12771, end= 9890,chat_data = load_chatdata("dp0-0.json")),
|
Block(first=0, last=12771, end=9890,
|
||||||
Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")),
|
chat_data=load_chatdata("dp0-0.json")),
|
||||||
Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")),
|
Block(first=9890, last=15800, end=20244,
|
||||||
Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")),
|
chat_data=load_chatdata("dp0-1.json")),
|
||||||
Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")),
|
Block(first=20244, last=45146, end=32476,
|
||||||
Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
|
chat_data=load_chatdata("dp0-2.json")),
|
||||||
|
Block(first=32476, last=50520, end=41380,
|
||||||
|
chat_data=load_chatdata("dp0-3.json")),
|
||||||
|
Block(first=41380, last=62875, end=52568,
|
||||||
|
chat_data=load_chatdata("dp0-4.json")),
|
||||||
|
Block(first=52568, last=62875, end=54000,
|
||||||
|
chat_data=load_chatdata("dp0-5.json"), is_last=True)
|
||||||
)
|
)
|
||||||
result = duplcheck.remove_overlap(blocks)
|
result = duplcheck.remove_overlap(blocks)
|
||||||
#dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
|
# dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
|
||||||
#but must be aligne to the most close and smaller value:9779.
|
# but must be aligne to the most close and smaller value:9779.
|
||||||
assert result[0].last == 9779
|
assert result[0].last == 9779
|
||||||
|
|
||||||
assert result[1].last == 15800
|
assert result[1].last == 15800
|
||||||
|
|
||||||
assert result[2].last == 32196
|
assert result[2].last == 32196
|
||||||
|
|
||||||
assert result[3].last == 41116
|
assert result[3].last == 41116
|
||||||
|
|
||||||
assert result[4].last == 52384
|
assert result[4].last == 52384
|
||||||
|
|
||||||
#the last block must be always added to result.
|
# the last block must be always added to result.
|
||||||
assert result[5].last == 62875
|
assert result[5].last == 62875
|
||||||
|
|
||||||
|
|
||||||
def test_duplicate_head():
|
def test_duplicate_head():
|
||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
|
json.loads(_open_file(
|
||||||
|
"tests/testdata/extract_duplcheck/head/" + filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -69,25 +75,26 @@ def test_duplicate_head():
|
|||||||
result : [2] , [4] , [5]
|
result : [2] , [4] , [5]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#chat data offsets are ignored.
|
# chat data offsets are ignored.
|
||||||
blocks = (
|
blocks = (
|
||||||
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
|
Block(first=0, last=2500, chat_data=load_chatdata("dp0-0.json")),
|
||||||
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
|
Block(first=0, last=38771, chat_data=load_chatdata("dp0-1.json")),
|
||||||
Block(first = 0, last =45146, chat_data = load_chatdata("dp0-2.json")),
|
Block(first=0, last=45146, chat_data=load_chatdata("dp0-2.json")),
|
||||||
Block(first = 20244, last =60520, chat_data = load_chatdata("dp0-3.json")),
|
Block(first=20244, last=60520, chat_data=load_chatdata("dp0-3.json")),
|
||||||
Block(first = 20244, last =62875, chat_data = load_chatdata("dp0-4.json")),
|
Block(first=20244, last=62875, chat_data=load_chatdata("dp0-4.json")),
|
||||||
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
|
Block(first=52568, last=62875, chat_data=load_chatdata("dp0-5.json"))
|
||||||
)
|
)
|
||||||
_dump(blocks)
|
_dump(blocks)
|
||||||
result = duplcheck.remove_duplicate_head(blocks)
|
result = duplcheck.remove_duplicate_head(blocks)
|
||||||
|
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
assert result[0].first == blocks[2].first
|
assert result[0].first == blocks[2].first
|
||||||
assert result[0].last == blocks[2].last
|
assert result[0].last == blocks[2].last
|
||||||
assert result[1].first == blocks[4].first
|
assert result[1].first == blocks[4].first
|
||||||
assert result[1].last == blocks[4].last
|
assert result[1].last == blocks[4].last
|
||||||
assert result[2].first == blocks[5].first
|
assert result[2].first == blocks[5].first
|
||||||
assert result[2].last == blocks[5].last
|
assert result[2].last == blocks[5].last
|
||||||
|
|
||||||
|
|
||||||
def test_duplicate_tail():
|
def test_duplicate_tail():
|
||||||
"""
|
"""
|
||||||
@@ -103,26 +110,25 @@ def test_duplicate_tail():
|
|||||||
"""
|
"""
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
|
json.loads(_open_file(
|
||||||
|
"tests/testdata/extract_duplcheck/head/" + filename))
|
||||||
)[1]
|
)[1]
|
||||||
#chat data offsets are ignored.
|
# chat data offsets are ignored.
|
||||||
blocks = (
|
blocks = (
|
||||||
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
|
Block(first=0, last=2500, chat_data=load_chatdata("dp0-0.json")),
|
||||||
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
|
Block(first=1500, last=2500, chat_data=load_chatdata("dp0-1.json")),
|
||||||
Block(first = 10000,last = 45146, chat_data=load_chatdata("dp0-2.json")),
|
Block(first=10000, last=45146, chat_data=load_chatdata("dp0-2.json")),
|
||||||
Block(first = 20244,last = 45146, chat_data=load_chatdata("dp0-3.json")),
|
Block(first=20244, last=45146, chat_data=load_chatdata("dp0-3.json")),
|
||||||
Block(first = 20244,last = 62875, chat_data=load_chatdata("dp0-4.json")),
|
Block(first=20244, last=62875, chat_data=load_chatdata("dp0-4.json")),
|
||||||
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
|
Block(first=52568, last=62875, chat_data=load_chatdata("dp0-5.json"))
|
||||||
)
|
)
|
||||||
|
|
||||||
result = duplcheck.remove_duplicate_tail(blocks)
|
result = duplcheck.remove_duplicate_tail(blocks)
|
||||||
_dump(result)
|
_dump(result)
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
assert result[0].first == blocks[0].first
|
assert result[0].first == blocks[0].first
|
||||||
assert result[0].last == blocks[0].last
|
assert result[0].last == blocks[0].last
|
||||||
assert result[1].first == blocks[2].first
|
assert result[1].first == blocks[2].first
|
||||||
assert result[1].last == blocks[2].last
|
assert result[1].last == blocks[2].last
|
||||||
assert result[2].first == blocks[4].first
|
assert result[2].first == blocks[4].first
|
||||||
assert result[2].last == blocks[4].last
|
assert result[2].last == blocks[4].last
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,19 @@
|
|||||||
import aiohttp
|
|
||||||
import asyncio
|
|
||||||
import json
|
import json
|
||||||
import os, sys
|
|
||||||
import time
|
|
||||||
from aioresponses import aioresponses
|
|
||||||
from pytchat.tool.extract import duplcheck
|
|
||||||
from pytchat.tool.extract import parser
|
from pytchat.tool.extract import parser
|
||||||
from pytchat.tool.extract.block import Block
|
from pytchat.tool.extract.block import Block
|
||||||
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
|
from pytchat.tool.extract.patch import Patch, split
|
||||||
from pytchat.tool.extract.duplcheck import _dump
|
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path, mode='r', encoding='utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
def load_chatdata(filename):
|
def load_chatdata(filename):
|
||||||
return parser.parse(
|
return parser.parse(
|
||||||
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
|
json.loads(_open_file("tests/testdata/fetch_patch/" + filename))
|
||||||
)[1]
|
)[1]
|
||||||
|
|
||||||
|
|
||||||
def test_split_0():
|
def test_split_0():
|
||||||
@@ -61,20 +57,23 @@ def test_split_0():
|
|||||||
@fetched patch
|
@fetched patch
|
||||||
|-- patch --|
|
|-- patch --|
|
||||||
"""
|
"""
|
||||||
parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
|
parent = Block(first=0, last=4000, end=60000,
|
||||||
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000,
|
||||||
|
continuation='mean', during_split=True)
|
||||||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
first=32500, last=34000, continuation='patch')
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
split(parent,child,patch)
|
split(parent, child, patch)
|
||||||
|
|
||||||
assert child.continuation == 'patch'
|
assert child.continuation == 'patch'
|
||||||
assert parent.last < child.first
|
assert parent.last < child.first
|
||||||
assert parent.end == child.first
|
assert parent.end == child.first
|
||||||
assert child.first < child.last
|
assert child.first < child.last
|
||||||
assert child.last < child.end
|
assert child.last < child.end
|
||||||
assert parent.during_split == False
|
assert parent.during_split is False
|
||||||
assert child.during_split == False
|
assert child.during_split is False
|
||||||
|
|
||||||
|
|
||||||
def test_split_1():
|
def test_split_1():
|
||||||
"""patch.first <= parent_block.last
|
"""patch.first <= parent_block.last
|
||||||
@@ -119,14 +118,15 @@ def test_split_1():
|
|||||||
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
first=32500, last=34000, continuation='patch')
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
split(parent,child,patch)
|
|
||||||
|
|
||||||
assert parent.last == 33000 #no change
|
split(parent, child, patch)
|
||||||
assert parent.end == 60000 #no change
|
|
||||||
|
assert parent.last == 33000 # no change
|
||||||
|
assert parent.end == 60000 # no change
|
||||||
assert child.continuation is None
|
assert child.continuation is None
|
||||||
assert parent.during_split == False
|
assert parent.during_split is False
|
||||||
assert child.during_split == True #exclude during_split sequence
|
assert child.during_split is True # exclude during_split sequence
|
||||||
|
|
||||||
|
|
||||||
def test_split_2():
|
def test_split_2():
|
||||||
"""child_block.end < patch.last:
|
"""child_block.end < patch.last:
|
||||||
@@ -174,7 +174,7 @@ def test_split_2():
|
|||||||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
first=32500, last=34000, continuation='patch')
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
split(parent,child,patch)
|
split(parent, child, patch)
|
||||||
|
|
||||||
assert child.continuation is None
|
assert child.continuation is None
|
||||||
assert parent.last < child.first
|
assert parent.last < child.first
|
||||||
@@ -182,8 +182,9 @@ def test_split_2():
|
|||||||
assert child.first < child.last
|
assert child.first < child.last
|
||||||
assert child.last < child.end
|
assert child.last < child.end
|
||||||
assert child.continuation is None
|
assert child.continuation is None
|
||||||
assert parent.during_split == False
|
assert parent.during_split is False
|
||||||
assert child.during_split == False
|
assert child.during_split is False
|
||||||
|
|
||||||
|
|
||||||
def test_split_none():
|
def test_split_none():
|
||||||
"""patch.last <= parent_block.last
|
"""patch.last <= parent_block.last
|
||||||
@@ -193,7 +194,7 @@ def test_split_none():
|
|||||||
and parent.block.last exceeds patch.first.
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
In this case, fetched patch is all discarded,
|
In this case, fetched patch is all discarded,
|
||||||
and worker searches other processing block again.
|
and worker searches other processing block again.
|
||||||
|
|
||||||
~~~~~~ before ~~~~~~
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
@@ -229,10 +230,10 @@ def test_split_none():
|
|||||||
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
first=32500, last=34000, continuation='patch')
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
split(parent,child,patch)
|
split(parent, child, patch)
|
||||||
|
|
||||||
assert parent.last == 40000 #no change
|
assert parent.last == 40000 # no change
|
||||||
assert parent.end == 60000 #no change
|
assert parent.end == 60000 # no change
|
||||||
assert child.continuation is None
|
assert child.continuation is None
|
||||||
assert parent.during_split == False
|
assert parent.during_split is False
|
||||||
assert child.during_split == True #exclude during_split sequence
|
assert child.during_split is True # exclude during_split sequence
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from aioresponses import aioresponses
|
from pytest_httpx import HTTPXMock
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
|
from pytchat.core_multithread.livechat import LiveChat
|
||||||
from pytchat.core_async.livechat import LiveChatAsync
|
from pytchat.core_async.livechat import LiveChatAsync
|
||||||
from pytchat.exceptions import ResponseContextError
|
from pytchat.exceptions import ResponseContextError
|
||||||
|
|
||||||
@@ -9,34 +12,37 @@ def _open_file(path):
|
|||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
def add_response_file(httpx_mock: HTTPXMock, jsonfile_path: str):
|
||||||
def test_Async(*mock):
|
testdata = json.loads(_open_file(jsonfile_path))
|
||||||
vid = '__test_id__'
|
httpx_mock.add_response(json=testdata)
|
||||||
_text = _open_file('tests/testdata/paramgen_firstread.json')
|
|
||||||
_text = json.loads(_text)
|
|
||||||
mock[0].get(
|
def test_async(httpx_mock: HTTPXMock):
|
||||||
f"https://www.youtube.com/live_chat?v={vid}&is_popout=1", status=200, body=_text)
|
add_response_file(httpx_mock, 'tests/testdata/paramgen_firstread.json')
|
||||||
|
|
||||||
|
async def test_loop():
|
||||||
|
try:
|
||||||
|
chat = LiveChatAsync(video_id='__test_id__')
|
||||||
|
_ = await chat.get()
|
||||||
|
assert chat.is_alive()
|
||||||
|
chat.terminate()
|
||||||
|
assert not chat.is_alive()
|
||||||
|
except ResponseContextError:
|
||||||
|
assert False
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
chat = LiveChatAsync(video_id='__test_id__')
|
loop.run_until_complete(test_loop())
|
||||||
|
except CancelledError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
def test_multithread(httpx_mock: HTTPXMock):
|
||||||
|
add_response_file(httpx_mock, 'tests/testdata/paramgen_firstread.json')
|
||||||
|
try:
|
||||||
|
chat = LiveChat(video_id='__test_id__')
|
||||||
|
_ = chat.get()
|
||||||
assert chat.is_alive()
|
assert chat.is_alive()
|
||||||
chat.terminate()
|
chat.terminate()
|
||||||
assert not chat.is_alive()
|
assert not chat.is_alive()
|
||||||
except ResponseContextError:
|
except ResponseContextError:
|
||||||
assert not chat.is_alive()
|
assert False
|
||||||
|
|
||||||
|
|
||||||
def test_MultiThread(mocker):
|
|
||||||
_text = _open_file('tests/testdata/paramgen_firstread.json')
|
|
||||||
_text = json.loads(_text)
|
|
||||||
responseMock = mocker.Mock()
|
|
||||||
responseMock.status_code = 200
|
|
||||||
responseMock.text = _text
|
|
||||||
mocker.patch('requests.Session.get').return_value = responseMock
|
|
||||||
try:
|
|
||||||
chat = LiveChatAsync(video_id='__test_id__')
|
|
||||||
assert chat.is_alive()
|
|
||||||
chat.terminate()
|
|
||||||
assert not chat.is_alive()
|
|
||||||
except ResponseContextError:
|
|
||||||
chat.terminate()
|
|
||||||
assert not chat.is_alive()
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import re
|
import json
|
||||||
from aioresponses import aioresponses
|
from pytest_httpx import HTTPXMock
|
||||||
from concurrent.futures import CancelledError
|
from concurrent.futures import CancelledError
|
||||||
from pytchat.core_multithread.livechat import LiveChat
|
from pytchat.core_multithread.livechat import LiveChat
|
||||||
from pytchat.core_async.livechat import LiveChatAsync
|
from pytchat.core_async.livechat import LiveChatAsync
|
||||||
@@ -12,18 +12,18 @@ def _open_file(path):
|
|||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
def add_response_file(httpx_mock: HTTPXMock, jsonfile_path: str):
|
||||||
def test_async_live_stream(*mock):
|
testdata = json.loads(_open_file(jsonfile_path))
|
||||||
|
httpx_mock.add_response(json=testdata)
|
||||||
|
|
||||||
async def test_loop(*mock):
|
|
||||||
pattern = re.compile(
|
def test_async_live_stream(httpx_mock: HTTPXMock):
|
||||||
r'^https://www.youtube.com/live_chat/get_live_chat\?continuation=.*$')
|
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
|
||||||
_text = _open_file('tests/testdata/test_stream.json')
|
|
||||||
mock[0].get(pattern, status=200, body=_text)
|
async def test_loop():
|
||||||
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
|
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
|
||||||
chats = await chat.get()
|
chats = await chat.get()
|
||||||
rawdata = chats[0]["chatdata"]
|
rawdata = chats[0]["chatdata"]
|
||||||
# assert fetching livachat data
|
|
||||||
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
|
assert list(rawdata[0]["addChatItemAction"]["item"].keys())[
|
||||||
0] == "liveChatTextMessageRenderer"
|
0] == "liveChatTextMessageRenderer"
|
||||||
assert list(rawdata[1]["addChatItemAction"]["item"].keys())[
|
assert list(rawdata[1]["addChatItemAction"]["item"].keys())[
|
||||||
@@ -41,25 +41,16 @@ def test_async_live_stream(*mock):
|
|||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(test_loop(*mock))
|
loop.run_until_complete(test_loop())
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
assert True
|
assert True
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
def test_async_replay_stream(httpx_mock: HTTPXMock):
|
||||||
def test_async_replay_stream(*mock):
|
add_response_file(httpx_mock, 'tests/testdata/finished_live.json')
|
||||||
|
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
|
||||||
async def test_loop(*mock):
|
|
||||||
pattern_live = re.compile(
|
|
||||||
r'^https://www.youtube.com/live_chat/get_live_chat\?continuation=.*$')
|
|
||||||
pattern_replay = re.compile(
|
|
||||||
r'^https://www.youtube.com/live_chat_replay/get_live_chat_replay\?continuation=.*$')
|
|
||||||
# empty livechat -> switch to fetch replaychat
|
|
||||||
_text_live = _open_file('tests/testdata/finished_live.json')
|
|
||||||
_text_replay = _open_file('tests/testdata/chatreplay.json')
|
|
||||||
mock[0].get(pattern_live, status=200, body=_text_live)
|
|
||||||
mock[0].get(pattern_replay, status=200, body=_text_replay)
|
|
||||||
|
|
||||||
|
async def test_loop():
|
||||||
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
|
chat = LiveChatAsync(video_id='__test_id__', processor=DummyProcessor())
|
||||||
chats = await chat.get()
|
chats = await chat.get()
|
||||||
rawdata = chats[0]["chatdata"]
|
rawdata = chats[0]["chatdata"]
|
||||||
@@ -71,27 +62,16 @@ def test_async_replay_stream(*mock):
|
|||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(test_loop(*mock))
|
loop.run_until_complete(test_loop())
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
assert True
|
assert True
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
def test_async_force_replay(httpx_mock: HTTPXMock):
|
||||||
def test_async_force_replay(*mock):
|
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
|
||||||
|
add_response_file(httpx_mock, 'tests/testdata/chatreplay.json')
|
||||||
|
|
||||||
async def test_loop(*mock):
|
async def test_loop():
|
||||||
pattern_live = re.compile(
|
|
||||||
r'^https://www.youtube.com/live_chat/get_live_chat\?continuation=.*$')
|
|
||||||
pattern_replay = re.compile(
|
|
||||||
r'^https://www.youtube.com/live_chat_replay/get_live_chat_replay\?continuation=.*$')
|
|
||||||
# valid live data, but force_replay = True
|
|
||||||
_text_live = _open_file('tests/testdata/test_stream.json')
|
|
||||||
# valid replay data
|
|
||||||
_text_replay = _open_file('tests/testdata/chatreplay.json')
|
|
||||||
|
|
||||||
mock[0].get(pattern_live, status=200, body=_text_live)
|
|
||||||
mock[0].get(pattern_replay, status=200, body=_text_replay)
|
|
||||||
# force replay
|
|
||||||
chat = LiveChatAsync(
|
chat = LiveChatAsync(
|
||||||
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
|
video_id='__test_id__', processor=DummyProcessor(), force_replay=True)
|
||||||
chats = await chat.get()
|
chats = await chat.get()
|
||||||
@@ -105,20 +85,13 @@ def test_async_force_replay(*mock):
|
|||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(test_loop(*mock))
|
loop.run_until_complete(test_loop())
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
assert True
|
assert True
|
||||||
|
|
||||||
|
|
||||||
def test_multithread_live_stream(mocker):
|
def test_multithread_live_stream(httpx_mock: HTTPXMock):
|
||||||
|
add_response_file(httpx_mock, 'tests/testdata/test_stream.json')
|
||||||
_text = _open_file('tests/testdata/test_stream.json')
|
|
||||||
responseMock = mocker.Mock()
|
|
||||||
responseMock.status_code = 200
|
|
||||||
responseMock.text = _text
|
|
||||||
mocker.patch(
|
|
||||||
'requests.Session.get').return_value.__enter__.return_value = responseMock
|
|
||||||
|
|
||||||
chat = LiveChat(video_id='__test_id__', processor=DummyProcessor())
|
chat = LiveChat(video_id='__test_id__', processor=DummyProcessor())
|
||||||
chats = chat.get()
|
chats = chat.get()
|
||||||
rawdata = chats[0]["chatdata"]
|
rawdata = chats[0]["chatdata"]
|
||||||
|
|||||||
@@ -1,21 +1,18 @@
|
|||||||
from pytchat.parser.live import Parser
|
from pytchat.parser.live import Parser
|
||||||
import json
|
import json
|
||||||
from aioresponses import aioresponses
|
|
||||||
from pytchat.exceptions import NoContents
|
from pytchat.exceptions import NoContents
|
||||||
|
|
||||||
|
|
||||||
|
parser = Parser(is_replay=False)
|
||||||
|
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path, mode='r', encoding='utf-8') as f:
|
with open(path, mode='r', encoding='utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
parser = Parser(is_replay=False)
|
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
|
||||||
def test_finishedlive(*mock):
|
def test_finishedlive(*mock):
|
||||||
'''配信が終了した動画を正しく処理できるか'''
|
'''配信が終了した動画を正しく処理できるか'''
|
||||||
|
|
||||||
_text = _open_file('tests/testdata/finished_live.json')
|
_text = _open_file('tests/testdata/finished_live.json')
|
||||||
_text = json.loads(_text)
|
_text = json.loads(_text)
|
||||||
|
|
||||||
@@ -26,10 +23,8 @@ def test_finishedlive(*mock):
|
|||||||
assert True
|
assert True
|
||||||
|
|
||||||
|
|
||||||
@aioresponses()
|
|
||||||
def test_parsejson(*mock):
|
def test_parsejson(*mock):
|
||||||
'''jsonを正常にパースできるか'''
|
'''jsonを正常にパースできるか'''
|
||||||
|
|
||||||
_text = _open_file('tests/testdata/paramgen_firstread.json')
|
_text = _open_file('tests/testdata/paramgen_firstread.json')
|
||||||
_text = json.loads(_text)
|
_text = json.loads(_text)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
|
from json.decoder import JSONDecodeError
|
||||||
from pytchat.tool.videoinfo import VideoInfo
|
from pytchat.tool.videoinfo import VideoInfo
|
||||||
from pytchat.exceptions import InvalidVideoIdException
|
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
|
||||||
|
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
@@ -12,13 +13,13 @@ def _set_test_data(filepath, mocker):
|
|||||||
response_mock = mocker.Mock()
|
response_mock = mocker.Mock()
|
||||||
response_mock.status_code = 200
|
response_mock.status_code = 200
|
||||||
response_mock.text = _text
|
response_mock.text = _text
|
||||||
mocker.patch('requests.get').return_value = response_mock
|
mocker.patch('httpx.get').return_value = response_mock
|
||||||
|
|
||||||
|
|
||||||
def test_archived_page(mocker):
|
def test_archived_page(mocker):
|
||||||
_set_test_data('tests/testdata/videoinfo/archived_page.txt', mocker)
|
_set_test_data('tests/testdata/videoinfo/archived_page.txt', mocker)
|
||||||
info = VideoInfo('__test_id__')
|
info = VideoInfo('__test_id__')
|
||||||
actual_thumbnail_url = 'https://i.ytimg.com/vi/fzI9FNjXQ0o/hqdefault.jpg'
|
actual_thumbnail_url = 'https://i.ytimg.com/vi/fzI9FNjXQ0o/hqdefault.jpg'
|
||||||
assert info.video_id == '__test_id__'
|
assert info.video_id == '__test_id__'
|
||||||
assert info.get_channel_name() == 'GitHub'
|
assert info.get_channel_name() == 'GitHub'
|
||||||
assert info.get_thumbnail() == actual_thumbnail_url
|
assert info.get_thumbnail() == actual_thumbnail_url
|
||||||
@@ -30,7 +31,7 @@ def test_archived_page(mocker):
|
|||||||
def test_live_page(mocker):
|
def test_live_page(mocker):
|
||||||
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
|
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
|
||||||
info = VideoInfo('__test_id__')
|
info = VideoInfo('__test_id__')
|
||||||
'''live page :duration = 0'''
|
'''live page: duration==0'''
|
||||||
assert info.get_duration() == 0
|
assert info.get_duration() == 0
|
||||||
assert info.video_id == '__test_id__'
|
assert info.video_id == '__test_id__'
|
||||||
assert info.get_channel_name() == 'BGM channel'
|
assert info.get_channel_name() == 'BGM channel'
|
||||||
@@ -64,3 +65,37 @@ def test_no_info(mocker):
|
|||||||
assert info.get_title() is None
|
assert info.get_title() is None
|
||||||
assert info.get_channel_id() is None
|
assert info.get_channel_id() is None
|
||||||
assert info.get_duration() is None
|
assert info.get_duration() is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_collapsed_data(mocker):
|
||||||
|
'''Test case the video page's info is collapsed.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/collapsed_page.txt', mocker)
|
||||||
|
try:
|
||||||
|
_ = VideoInfo('__test_id__')
|
||||||
|
assert False
|
||||||
|
except JSONDecodeError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
def test_pattern_unmatch(mocker):
|
||||||
|
'''Test case the pattern for extraction is unmatched.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/pattern_unmatch.txt', mocker)
|
||||||
|
try:
|
||||||
|
_ = VideoInfo('__test_id__')
|
||||||
|
assert False
|
||||||
|
except PatternUnmatchError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
def test_extradata_handling(mocker):
|
||||||
|
'''Test case the extracted data are JSON lines.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/extradata_page.txt', mocker)
|
||||||
|
try:
|
||||||
|
_ = VideoInfo('__test_id__')
|
||||||
|
assert True
|
||||||
|
except JSONDecodeError as e:
|
||||||
|
print(e.doc)
|
||||||
|
assert False
|
||||||
|
|||||||
15
tests/testdata/videoinfo/collapsed_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/collapsed_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
15
tests/testdata/videoinfo/extradata_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/extradata_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
15
tests/testdata/videoinfo/pattern_unmatch.txt
vendored
Normal file
15
tests/testdata/videoinfo/pattern_unmatch.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user