Compare commits

...

9 Commits

Author SHA1 Message Date
taizan-hokuto
3243d69d7a Merge branch 'hotfix/json_decode_error' 2020-03-14 09:43:37 +09:00
taizan-hokuto
6e1b735ebc Increment version 2020-03-14 09:42:53 +09:00
taizan-hokuto
c54481dad5 Add header html and show progress 2020-03-14 09:26:28 +09:00
taizan-hokuto
78604c84d4 Fix testdata path separator 2020-03-14 08:16:19 +09:00
taizan-hokuto
21d93613a2 Handling JSONDecodeError 2020-03-14 08:00:31 +09:00
taizan-hokuto
5f50598f79 Merge branch 'hotfix/argparse' 2020-03-10 01:58:24 +09:00
taizan-hokuto
5e8c438c6b Increment version 2020-03-10 01:57:55 +09:00
taizan-hokuto
23e47f6fb0 Fix parsing video_id which starts with '-' 2020-03-10 01:57:01 +09:00
taizan-hokuto
74dfe0a612 Modify requirements.txt 2020-03-10 01:06:36 +09:00
8 changed files with 69 additions and 26 deletions

View File

@@ -7,10 +7,10 @@ pytchat is a python library for fetching youtube live chat.
pytchat is a python library for fetching youtube live chat pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup. without using youtube api, Selenium or BeautifulSoup.
pytchatはAPIを使わずにYouTubeチャットを取得するための軽量pythonライブラリです。 pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
Other features: Other features:
+ Customizable chat data processors including youtube api compatible one. + Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
+ Available on asyncio context. + Available on asyncio context.
+ Quick fetching of initial chat data by generating continuation params + Quick fetching of initial chat data by generating continuation params
instead of web scraping. instead of web scraping.

View File

@@ -2,7 +2,7 @@
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup. pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.0.6.4' __version__ = '0.0.6.6'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -22,9 +22,10 @@ def main():
# Arguments # Arguments
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}') parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str, parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
help='Video IDs separated by commas without space') help='Video IDs separated by commas without space.\n'
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/")', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Settings version') help='Settings version')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
@@ -35,17 +36,25 @@ def main():
# Extractor # Extractor
if Arguments().video_ids: if Arguments().video_ids:
for video_id in Arguments().video_ids: for video_id in Arguments().video_ids:
if '[' in video_id:
video_id = video_id.replace('[','').replace(']','')
try: try:
info = VideoInfo(video_id) info = VideoInfo(video_id)
print(f"Extracting...\n" print(f"Extracting...\n"
f" video_id: {video_id}\n" f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}") f" title: {info.get_title()}")
path = Path(Arguments().output+video_id+'.html')
print(f"output path: {path.resolve()}")
Extractor(video_id, Extractor(video_id,
processor = HTMLArchiver(Arguments().output+video_id+'.html') processor = HTMLArchiver(Arguments().output+video_id+'.html'),
callback = _disp_progress
).extract() ).extract()
print("Extraction end.\n") print("\nExtraction end.\n")
except (InvalidVideoIdException, NoContentsException) as e: except (InvalidVideoIdException, NoContentsException) as e:
print(e) print(e)
return return
parser.print_help() parser.print_help()
def _disp_progress(a,b):
print('.',end="",flush=True)

View File

@@ -1,46 +1,52 @@
class ChatParseException(Exception): class ChatParseException(Exception):
''' '''
チャットデータをパースするライブラリが投げる例外の基底クラス Base exception thrown by the parser
''' '''
pass pass
class NoYtinitialdataException(ChatParseException): class NoYtinitialdataException(ChatParseException):
''' '''
配信ページ内にチャットデータurlが見つからないときに投げる例外 Thrown when the video is not found.
''' '''
pass pass
class ResponseContextError(ChatParseException): class ResponseContextError(ChatParseException):
''' '''
配信ページでチャットデータ無効の時に投げる例外 Thrown when chat data is invalid.
''' '''
pass pass
class NoLivechatRendererException(ChatParseException): class NoLivechatRendererException(ChatParseException):
''' '''
チャットデータのJSON中にlivechatRendererがない時に投げる例外 Thrown when livechatRenderer is missing in JSON.
''' '''
pass pass
class NoContentsException(ChatParseException): class NoContentsException(ChatParseException):
''' '''
チャットデータのJSON中にContinuationContentsがない時に投げる例外 Thrown when ContinuationContents is missing in JSON.
''' '''
pass pass
class NoContinuationsException(ChatParseException): class NoContinuationsException(ChatParseException):
''' '''
チャットデータのContinuationContents中にcontinuationがない時に投げる例外 Thrown when continuation is missing in ContinuationContents.
''' '''
pass pass
class IllegalFunctionCall(Exception): class IllegalFunctionCall(Exception):
''' '''
set_callback()を実行済みにもかかわらず Thrown when get () is called even though
get()を呼び出した場合の例外 set_callback () has been executed.
''' '''
pass pass
class InvalidVideoIdException(Exception): class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
class UnknownConnectionError(Exception):
pass pass

View File

@@ -8,6 +8,11 @@ PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
fmt_headers = ['datetime','elapsed','authorName','message','superchat' fmt_headers = ['datetime','elapsed','authorName','message','superchat'
,'type','authorChannel'] ,'type','authorChannel']
HEADER_HTML = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
'''
class HTMLArchiver(ChatProcessor): class HTMLArchiver(ChatProcessor):
''' '''
HtmlArchiver saves chat data as HTML table format. HtmlArchiver saves chat data as HTML table format.
@@ -17,6 +22,7 @@ class HTMLArchiver(ChatProcessor):
super().__init__() super().__init__()
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
with open(self.save_path, mode='a', encoding = 'utf-8') as f: with open(self.save_path, mode='a', encoding = 'utf-8') as f:
f.write(HEADER_HTML)
f.write('<table border="1" style="border-collapse: collapse">') f.write('<table border="1" style="border-collapse: collapse">')
f.writelines(self._parse_html_header(fmt_headers)) f.writelines(self._parse_html_header(fmt_headers))
self.processor = DefaultProcessor() self.processor = DefaultProcessor()

View File

@@ -7,12 +7,15 @@ from . worker import ExtractWorker
from . patch import Patch from . patch import Patch
from ... import config from ... import config
from ... paramgen import arcparam from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from json import JSONDecodeError
from urllib.parse import quote from urllib.parse import quote
headers = config.headers headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation=" "get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
def _split(start, end, count, min_interval_sec = 120): def _split(start, end, count, min_interval_sec = 120):
""" """
@@ -57,9 +60,18 @@ def ready_blocks(video_id, duration, div, callback):
async def _create_block(session, video_id, seektime, callback): async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime = seektime) continuation = arcparam.getparam(video_id, seektime = seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try :
async with session.get(url, headers = headers) as resp: async with session.get(url, headers = headers) as resp:
text = await resp.text() text = await resp.text()
next_continuation, actions = parser.parse(json.loads(text)) next_continuation, actions = parser.parse(json.loads(text))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions: if actions:
first = parser.get_offset(actions[0]) first = parser.get_offset(actions[0])
last = parser.get_offset(actions[-1]) last = parser.get_offset(actions[-1])
@@ -71,6 +83,7 @@ def ready_blocks(video_id, duration, div, callback):
first = first, first = first,
last = last last = last
) )
""" """
fetch initial blocks. fetch initial blocks.
""" """
@@ -95,9 +108,18 @@ def fetch_patch(callback, blocks, video_id):
async def _fetch(continuation,session) -> Patch: async def _fetch(continuation,session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
for _ in range(MAX_RETRY_COUNT):
try:
async with session.get(url,headers = config.headers) as resp: async with session.get(url,headers = config.headers) as resp:
chat_json = await resp.text() chat_json = await resp.text()
continuation, actions = parser.parse(json.loads(chat_json)) continuation, actions = parser.parse(json.loads(chat_json))
break
except JSONDecodeError:
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
if actions: if actions:
last = parser.get_offset(actions[-1]) last = parser.get_offset(actions[-1])
first = parser.get_offset(actions[0]) first = parser.get_offset(actions[0])
@@ -105,6 +127,7 @@ def fetch_patch(callback, blocks, video_id):
callback(actions, last - first) callback(actions, last - first)
return Patch(actions, continuation, first, last) return Patch(actions, continuation, first, last)
return Patch(continuation = continuation) return Patch(continuation = continuation)
""" """
allocate workers and assign blocks. allocate workers and assign blocks.
""" """

View File

@@ -1,5 +1,4 @@
aiohttp aiohttp
argumentparser
pytz pytz
requests requests
urllib3 urllib3

View File

@@ -36,7 +36,7 @@ def test_process_0():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\superchat_0.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
} }
assert SuperchatCalculator().process([chat_component])=={'': 6800.0, '': 2.0} assert SuperchatCalculator().process([chat_component])=={'': 6800.0, '': 2.0}
@@ -47,7 +47,7 @@ def test_process_1():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\text_only.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
} }
assert SuperchatCalculator().process([chat_component])=={} assert SuperchatCalculator().process([chat_component])=={}
@@ -59,7 +59,7 @@ def test_process_2():
chat_component = { chat_component = {
'video_id':'', 'video_id':'',
'timeout':10, 'timeout':10,
'chatdata':load_chatdata(r"tests\testdata\calculator\replay_end.json") 'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
} }
assert False assert False
SuperchatCalculator().process([chat_component]) SuperchatCalculator().process([chat_component])