Compare commits

...

30 Commits

Author SHA1 Message Date
taizan-hokuto
d3f1643a40 Merge branch 'release/v0.2.1' 2020-09-09 22:23:01 +09:00
taizan-hokuto
eb29f27493 Increment version 2020-09-09 22:22:31 +09:00
taizan-hokuto
8adf75ab83 Merge branch 'feature/pbar' into develop 2020-09-09 22:20:36 +09:00
taizan-hokuto
2e05803d75 Remove unnecessary option 2020-09-09 22:20:09 +09:00
taizan-hokuto
f16c0ee73a Fix progress bar line feed and remove pbar option 2020-09-09 22:19:10 +09:00
taizan-hokuto
a338f2b782 Merge tag 'v0.2.0' into develop
v0.2.0
2020-09-07 23:35:45 +09:00
taizan-hokuto
864ccddfd7 Merge branch 'release/v0.2.0' 2020-09-07 23:35:44 +09:00
taizan-hokuto
339df69e36 Increment version 2020-09-07 23:35:14 +09:00
taizan-hokuto
76a5b0cd18 Merge branch 'feature/new_item' into develop 2020-09-07 23:34:16 +09:00
taizan-hokuto
be0ab2431b Delete test for unuse module 2020-09-07 23:33:26 +09:00
taizan-hokuto
2edb60c592 Delete unuse modules 2020-09-07 23:31:32 +09:00
taizan-hokuto
2c6c3a1ca3 Delete old progress bar 2020-09-07 23:30:49 +09:00
taizan-hokuto
4be540793d Delete unnecessary blank lines 2020-09-07 23:30:30 +09:00
taizan-hokuto
08b86fe596 Make it possible to switch progress bar 2020-09-07 23:29:48 +09:00
taizan-hokuto
157f3b9952 Fix handling when missing id and type 2020-09-07 23:28:03 +09:00
taizan-hokuto
8f3ca2662a Merge tag 'pbar' into develop
v0.1.9
2020-09-06 18:58:34 +09:00
taizan-hokuto
c4b015861c Merge branch 'hotfix/pbar' 2020-09-06 18:58:33 +09:00
taizan-hokuto
3aa413d59e Increment version 2020-09-06 18:54:10 +09:00
taizan-hokuto
03ba285a16 Fix callback handling 2020-09-06 18:53:35 +09:00
taizan-hokuto
5fe0ee5aa8 Merge tag 'v0.1.8' into develop
v0.1.8
2020-09-06 18:27:58 +09:00
taizan-hokuto
4e829a25d4 Merge branch 'release/v0.1.8' 2020-09-06 18:27:57 +09:00
taizan-hokuto
15132a9bb8 Increment version 2020-09-06 18:27:08 +09:00
taizan-hokuto
64ace9dad6 Update progress bar 2020-09-06 18:25:16 +09:00
taizan-hokuto
9a2e96d3a0 Merge tag 'extract_vid' into develop
v0.1.7
2020-09-04 01:55:42 +09:00
taizan-hokuto
a3695a59b8 Merge branch 'hotfix/extract_vid' 2020-09-04 01:55:41 +09:00
taizan-hokuto
bc8655ed62 Increment version 2020-09-04 01:53:14 +09:00
taizan-hokuto
3bdc465740 Devide exception handling 2020-09-04 01:52:53 +09:00
taizan-hokuto
235d6b7212 Fix extract video info 2020-09-04 01:46:10 +09:00
taizan-hokuto
9f0754da57 Merge tag 'http2' into develop
v0.1.6
2020-09-03 21:27:48 +09:00
taizan-hokuto
0e301f48a8 Merge tag 'v0.1.5' into develop
v0.1.5
2020-09-03 20:16:56 +09:00
17 changed files with 109 additions and 511 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.1.6' __version__ = '0.2.1'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -32,11 +32,12 @@ def main():
'If ID starts with a hyphen (-), enclose the ID in square brackets.') 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true', parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
help='Save error data when error occurs(".dat" file)') help='Save error data when error occurs(".dat" file)')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
if Arguments().print_version: if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto') print(f'pytchat v{__version__} © 2019 taizan-hokuto')
return return
@@ -45,7 +46,7 @@ def main():
if not Arguments().video_ids: if not Arguments().video_ids:
parser.print_help() parser.print_help()
return return
for video_id in Arguments().video_ids: for counter, video_id in enumerate(Arguments().video_ids):
if '[' in video_id: if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '') video_id = video_id.replace('[', '').replace(']', '')
try: try:
@@ -55,30 +56,42 @@ def main():
else: else:
raise FileNotFoundError raise FileNotFoundError
info = VideoInfo(video_id) info = VideoInfo(video_id)
print(f"Extracting...\n" if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n"
f" video_id: {video_id}\n" f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}") f" title: {info.get_title()}")
print(f" output path: {path.resolve()}") print(f" output path: {path.resolve()}")
duration = info.get_duration() duration = info.get_duration()
pbar = ProgressBar(duration) pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id, ex = Extractor(video_id,
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
callback=pbar._disp, callback=pbar._disp,
div=10) div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar))) signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
ex.extract() data = ex.extract()
if data == []:
return False
pbar.reset("#", "=", total=len(data), status="Rendering ")
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
pbar.reset('#', '#', status='Completed ')
pbar.close() pbar.close()
print()
if pbar.is_cancelled(): if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n") print("\nThe extraction process has been discontinued.\n")
return
print("\nThe extraction process has been completed.\n")
except InvalidVideoIdException: except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id) print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e: except NoContents as e:
print(e)
print(e.with_traceback())
except FileNotFoundError: except FileNotFoundError:
print("The specified directory does not exist.:{}".format(Arguments().output)) print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e: except JSONDecodeError as e:
@@ -95,6 +108,6 @@ def main():
return return
def cancel(ex: Extractor, pbar: ProgressBar): def cancel(ex, pbar):
ex.cancel() ex.cancel()
pbar.cancel() pbar.cancel()

View File

@@ -36,6 +36,7 @@ class Arguments(metaclass=Singleton):
self.output: str = arguments[Arguments.Name.OUTPUT] self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = [] self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
# Videos # Videos
if arguments[Arguments.Name.VIDEO_IDS]: if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id self.video_ids = [video_id

View File

@@ -4,35 +4,50 @@ vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License) (MIT License)
''' '''
import shutil
import sys import sys
class ProgressBar: class ProgressBar:
def __init__(self, duration): def __init__(self, total, status):
self._duration = duration
self._count = 0
self._bar_len = 60 self._bar_len = 60
self._cancelled = False self._cancelled = False
self.reset(total=total, status=status)
self._blinker = 0
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
self._status = status
self._count = 0
def _disp(self, _, fetched): def _disp(self, _, fetched):
self._progress(fetched / 1000, self._duration) self._progress(fetched, self._total)
def _progress(self, fillin, total, status=''): def _progress(self, fillin, total):
if total == 0 or self._cancelled: if total == 0 or self._cancelled:
return return
self._count += fillin self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total))) filled_len = int(round(self._bar_len * self._count / float(total)))
percents = round(100.0 * self._count / float(total), 1) percents = round(100.0 * self._count / float(total), 1)
if percents > 100:
percents = 100.0
if filled_len > self._bar_len: if filled_len > self._bar_len:
filled_len = self._bar_len filled_len = self._bar_len
percents = 100
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len) bar = self._symbol_done * filled_len + \
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status)) self._symbol_space * (self._bar_len - filled_len)
disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
sys.stdout.write(disp)
sys.stdout.flush() sys.stdout.flush()
self._blinker += 1
def close(self): def close(self):
if not self._cancelled: if not self._cancelled:
self._progress(self._duration, self._duration) self._progress(self._total, self._total)
def cancel(self): def cancel(self):
self._cancelled = True self._cancelled = True

View File

@@ -43,20 +43,21 @@ class HTMLArchiver(ChatProcessor):
''' '''
HTMLArchiver saves chat data as HTML table format. HTMLArchiver saves chat data as HTML table format.
''' '''
def __init__(self, save_path): def __init__(self, save_path, callback=None):
super().__init__() super().__init__()
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor() self.processor = DefaultProcessor()
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML] self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)] self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
self.callback = callback
def _checkpath(self, filepath): def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath)) splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0] body = splitter[0]
extention = splitter[1] extention = splitter[1]
newpath = filepath newpath = filepath
counter = 0 counter = 1
while os.path.exists(newpath): while os.path.exists(newpath):
match = re.search(PATTERN, body) match = re.search(PATTERN, body)
if match: if match:
@@ -80,17 +81,20 @@ class HTMLArchiver(ChatProcessor):
""" """
if chat_components is None or len(chat_components) == 0: if chat_components is None or len(chat_components) == 0:
return return
self.body.extend( for c in self.processor.process(chat_components).items:
(self._parse_html_line(( self.body.extend(
c.datetime, self._parse_html_line((
c.elapsedTime, c.datetime,
c.author.name, c.elapsedTime,
self._parse_message(c.messageEx), c.author.name,
c.amountString, self._parse_message(c.messageEx),
c.author.type, c.amountString,
c.author.channelId) c.author.type,
) for c in self.processor.process(chat_components).items) c.author.channelId)
) )
)
if self.callback:
self.callback(None, 1)
def _parse_html_line(self, raw_line): def _parse_html_line(self, raw_line):
return ''.join(('<tr>', return ''.join(('<tr>',
@@ -131,7 +135,7 @@ class HTMLArchiver(ChatProcessor):
def finalize(self): def finalize(self):
self.header.extend([self._create_styles(), '</head>\n']) self.header.extend([self._create_styles(), '</head>\n'])
self.body.extend(['</table>\n</body>']) self.body.extend(['</table>\n</body>\n</html>'])
with open(self.save_path, mode='a', encoding='utf-8') as f: with open(self.save_path, mode='a', encoding='utf-8') as f:
f.writelines(self.header) f.writelines(self.header)
f.writelines(self.body) f.writelines(self.body)

View File

@@ -42,10 +42,14 @@ def get_offset(item):
def get_id(item): def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values() a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
)[0])['item'].values())[0].get('id') if a:
return list(a.values())[0].get('id')
return None
def get_type(item): def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values() a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
)[0])['item'].keys())[0] if a:
return list(a.keys())[0]
return None

View File

@@ -1,146 +0,0 @@
import httpx
import asyncio
import json
from . import parser
from . block import Block
from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... paramgen import arcparam_mining as arcparam
from concurrent.futures import CancelledError
from urllib.parse import quote
headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay?continuation="
INTERVAL = 1
def _split(start, end, count, min_interval_sec=120):
"""
Split section from `start` to `end` into `count` pieces,
and returns the beginning of each piece.
The `count` is adjusted so that the length of each piece
is no smaller than `min_interval`.
Returns:
--------
List of the offset of each block's first chat data.
"""
if not (isinstance(start, int) or isinstance(start, float)) or \
not (isinstance(end, int) or isinstance(end, float)):
raise ValueError("start/end must be int or float")
if not isinstance(count, int):
raise ValueError("count must be int")
if start > end:
raise ValueError("end must be equal to or greater than start.")
if count < 1:
raise ValueError("count must be equal to or greater than 1.")
if (end - start) / count < min_interval_sec:
count = int((end - start) / min_interval_sec)
if count == 0:
count = 1
interval = (end - start) / count
if count == 1:
return [start]
return sorted(list(set([int(start + interval * j)
for j in range(count)])))
def ready_blocks(video_id, duration, div, callback):
if div <= 0:
raise ValueError
async def _get_blocks(video_id, duration, div, callback):
async with httpx.ClientSession() as session:
tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(0, duration, div)]
return await asyncio.gather(*tasks)
async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime=seektime)
url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
f"{int(seektime*1000)}&hidden=false&pbj=1")
async with session.get(url, headers=headers) as resp:
chat_json = await resp.text()
if chat_json is None:
return
continuation, actions = parser.parse(json.loads(chat_json)[1])
first = seektime
seektime += INTERVAL
if callback:
callback(actions, INTERVAL)
return Block(
continuation=continuation,
chat_data=actions,
first=first,
last=seektime,
seektime=seektime
)
"""
fetch initial blocks.
"""
loop = asyncio.get_event_loop()
blocks = loop.run_until_complete(
_get_blocks(video_id, duration, div, callback))
return blocks
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
workers = [
ExtractWorker(
fetch=_fetch, block=block,
blocks=blocks, video_id=video_id
)
for block in blocks
]
async with httpx.ClientSession() as session:
tasks = [worker.run(session) for worker in workers]
return await asyncio.gather(*tasks)
async def _fetch(seektime, session) -> Patch:
continuation = arcparam.getparam(video_id, seektime=seektime)
url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
f"{int(seektime*1000)}&hidden=false&pbj=1")
async with session.get(url, headers=config.headers) as resp:
chat_json = await resp.text()
actions = []
try:
if chat_json is None:
return Patch()
continuation, actions = parser.parse(json.loads(chat_json)[1])
except json.JSONDecodeError:
pass
if callback:
callback(actions, INTERVAL)
return Patch(chats=actions, continuation=continuation,
seektime=seektime, last=seektime)
"""
allocate workers and assign blocks.
"""
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(_allocate_workers())
except CancelledError:
pass
async def _shutdown():
print("\nshutdown...")
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
def cancel():
loop = asyncio.get_event_loop()
loop.create_task(_shutdown())

View File

@@ -1,62 +0,0 @@
from . import parser
class Block:
"""Block object represents something like a box
to join chunk of chatdata.
Parameter:
---------
first : int :
videoOffsetTimeMs of the first chat_data
(chat_data[0])
last : int :
videoOffsetTimeMs of the last chat_data.
(chat_data[-1])
this value increases as fetching chatdata progresses.
end : int :
target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block.
when extract worker reaches this offset, stop fetching.
continuation : str :
continuation param of last chat data.
chat_data : list
done : bool :
whether this block has been fetched.
remaining : int :
remaining data to extract.
equals end - last.
is_last : bool :
whether this block is the last one in blocklist.
during_split : bool :
whether this block is in the process of during_split.
while True, this block is excluded from duplicate split procedure.
seektime : float :
the last position of this block(seconds) already fetched.
"""
__slots__ = ['first','last','end','continuation','chat_data','remaining',
'done','is_last','during_split','seektime']
def __init__(self, first = 0, last = 0, end = 0,
continuation = '', chat_data = [], is_last = False,
during_split = False, seektime = None):
self.first = first
self.last = last
self.end = end
self.continuation = continuation
self.chat_data = chat_data
self.done = False
self.remaining = self.end - self.last
self.is_last = is_last
self.during_split = during_split
self.seektime = seektime

View File

@@ -1,73 +0,0 @@
import re
from ... import config
from ... exceptions import (
ResponseContextError,
NoContents, NoContinuation)
logger = config.logger(__name__)
def parse(jsn):
"""
Parse replay chat data.
Parameter:
----------
jsn : dict
JSON of replay chat data.
Returns:
------
continuation : str
actions : list
"""
if jsn is None:
raise ValueError("parameter JSON is None")
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError(
'video_id is invalid or private/deleted.')
contents = jsn["response"].get('continuationContents')
if contents is None:
raise NoContents('No chat data.')
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise NoContinuation('No Continuation')
metadata = cont.get('liveChatReplayContinuationData')
if metadata:
continuation = metadata.get("continuation")
actions = contents['liveChatContinuation'].get('actions')
if continuation:
return continuation, [action["replayChatItemAction"]["actions"][0]
for action in actions
if list(action['replayChatItemAction']["actions"][0].values()
)[0]['item'].get("liveChatPaidMessageRenderer")
or list(action['replayChatItemAction']["actions"][0].values()
)[0]['item'].get("liveChatPaidStickerRenderer")
]
return None, []
def get_offset(item):
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].values())[0].get('id')
def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].keys())[0]
_REGEX_YTINIT = re.compile(
"window\\[\"ytInitialData\"\\]\\s*=\\s*({.+?});\\s+")
def extract(text):
match = re.findall(_REGEX_YTINIT, str(text))
if match:
return match[0]
return None

View File

@@ -1,27 +0,0 @@
from . import parser
from . block import Block
from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
which is fetched by asyncdl.fetch_patch._fetch().
"""
chats : list = []
continuation : str = None
seektime : float = None
first : int = None
last : int = None
def fill(block:Block, patch:Patch):
if patch.last < block.end:
set_patch(block, patch)
return
block.continuation = None
def set_patch(block:Block, patch:Patch):
block.continuation = patch.continuation
block.chat_data.extend(patch.chats)
block.last = patch.seektime
block.seektime = patch.seektime

View File

@@ -1,72 +0,0 @@
from . import asyncdl
from . import parser
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers=config.headers
class SuperChatMiner:
def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.')
elif div > 10:
div = 10
if not isinstance(duration ,int) or duration < 1:
raise ValueError('duration must be positive integer.')
self.video_id = video_id
self.duration = duration
self.div = div
self.callback = callback
self.blocks = []
def _ready_blocks(self):
blocks = asyncdl.ready_blocks(
self.video_id, self.duration, self.div, self.callback)
self.blocks = [block for block in blocks if block is not None]
return self
def _set_block_end(self):
for i in range(len(self.blocks)-1):
self.blocks[i].end = self.blocks[i+1].first
self.blocks[-1].end = self.duration
self.blocks[-1].is_last =True
return self
def _download_blocks(self):
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self
def _combine(self):
ret = []
for block in self.blocks:
ret.extend(block.chat_data)
return ret
def extract(self):
return (
self._ready_blocks()
._set_block_end()
._download_blocks()
._combine()
)
def extract(video_id, div = 1, callback = None, processor = None):
duration = 0
try:
duration = VideoInfo(video_id).get_duration()
except InvalidVideoIdException:
raise
if duration == 0:
print("video is live.")
return []
data = SuperChatMiner(video_id, duration, div, callback).extract()
if processor is None:
return data
return processor.process(
[{'video_id':None,'timeout':1,'chatdata' : (action
for action in data)}]
)
def cancel():
asyncdl.cancel()

View File

@@ -1,45 +0,0 @@
from . import parser
from . block import Block
from . patch import Patch, fill
from ... paramgen import arcparam
INTERVAL = 1
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.
Parameter
----------
fetch : func :
extract function of asyncdl
block : Block :
Block object that includes chat_data
blocks : list :
List of Block(s)
video_id : str :
parent_block : Block :
the block from which current block is splitted
"""
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
def __init__(self, fetch, block, blocks, video_id ):
self.block:Block = block
self.fetch = fetch
self.blocks:list = blocks
self.video_id:str = video_id
self.parent_block:Block = None
async def run(self, session):
while self.block.continuation:
patch = await self.fetch(
self.block.seektime, session)
fill(self.block, patch)
self.block.seektime += INTERVAL
self.block.done = True

View File

@@ -93,7 +93,8 @@ class VideoInfo:
result = re.search(pattern, text) result = re.search(pattern, text)
if result is None: if result is None:
raise PatternUnmatchError(text) raise PatternUnmatchError(text)
res = json.loads(result.group(1)[:-1]) decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response) response = self._get_item(res, item_response)
if response is None: if response is None:
self._check_video_is_private(res.get("args")) self._check_video_is_private(res.get("args"))

View File

@@ -1,41 +0,0 @@
from pytchat.tool.mining import parser
import pytchat.config as config
import httpx
import json
from pytchat.paramgen import arcparam_mining as arcparam
def test_arcparam_e(mocker):
try:
arcparam.getparam("01234567890", -1)
assert False
except ValueError:
assert True
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", 0)
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
print(param)
assert param == "op2w0wQzGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABWgUQgMLXL2AEcgIIAXgB"
def test_arcparam_2(mocker):
param = arcparam.getparam("PZz9NB0-Z64", 1)
url = f"https://www.youtube.com/live_chat_replay?continuation={param}&playerOffsetMs=1000&pbj=1"
resp = httpx.Client(http2=True).get(url, headers=config.headers)
jsn = json.loads(resp.text)
_, chatdata = parser.parse(jsn[1])
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatPaidMessageRenderer"]["id"]
print(test_id)
assert test_id == "ChwKGkNKSGE0YnFJeWVBQ0ZWcUF3Z0VkdGIwRm9R"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"

View File

@@ -1,7 +1,6 @@
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
from pytchat import util
def _open_file(path): def _open_file(path):
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
def test_live_page(mocker): def test_live_page(mocker):
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker) _set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
info = VideoInfo('__test_id__') info = VideoInfo('__test_id__')
'''live page :duration = 0''' '''live page: duration==0'''
assert info.get_duration() == 0 assert info.get_duration() == 0
assert info.video_id == '__test_id__' assert info.video_id == '__test_id__'
assert info.get_channel_name() == 'BGM channel' assert info.get_channel_name() == 'BGM channel'
@@ -88,3 +87,15 @@ def test_pattern_unmatch(mocker):
assert False assert False
except PatternUnmatchError: except PatternUnmatchError:
assert True assert True
def test_extradata_handling(mocker):
'''Test case the extracted data are JSON lines.'''
_set_test_data(
'tests/testdata/videoinfo/extradata_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert True
except JSONDecodeError as e:
print(e.doc)
assert False

File diff suppressed because one or more lines are too long