Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e829a25d4 | ||
|
|
15132a9bb8 | ||
|
|
64ace9dad6 | ||
|
|
9a2e96d3a0 | ||
|
|
a3695a59b8 | ||
|
|
bc8655ed62 | ||
|
|
3bdc465740 | ||
|
|
235d6b7212 | ||
|
|
9f0754da57 | ||
|
|
306b0a4564 | ||
|
|
1c49387f1a | ||
|
|
300d96e56c | ||
|
|
0e301f48a8 |
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.1.5'
|
__version__ = '0.1.8'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -62,23 +62,36 @@ def main():
|
|||||||
|
|
||||||
print(f" output path: {path.resolve()}")
|
print(f" output path: {path.resolve()}")
|
||||||
duration = info.get_duration()
|
duration = info.get_duration()
|
||||||
pbar = ProgressBar(duration)
|
pbar = ProgressBar(total=(duration * 1000) / 0.99, status="Extracting")
|
||||||
ex = Extractor(video_id,
|
ex = Extractor(video_id,
|
||||||
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
|
|
||||||
callback=pbar._disp,
|
callback=pbar._disp,
|
||||||
div=10)
|
div=10)
|
||||||
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
|
||||||
ex.extract()
|
data = ex.extract()
|
||||||
|
if data == []:
|
||||||
|
return False
|
||||||
|
pbar.reset("#", "=", total=len(data), status="Rendering ")
|
||||||
|
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
|
||||||
|
processor.process(
|
||||||
|
[{'video_id': None,
|
||||||
|
'timeout': 1,
|
||||||
|
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
|
)
|
||||||
|
processor.finalize()
|
||||||
|
pbar.reset('#', '#', status='Completed ')
|
||||||
pbar.close()
|
pbar.close()
|
||||||
|
print()
|
||||||
if pbar.is_cancelled():
|
if pbar.is_cancelled():
|
||||||
print("\nThe extraction process has been discontinued.\n")
|
print("\nThe extraction process has been discontinued.\n")
|
||||||
return
|
return False
|
||||||
print("\nThe extraction process has been completed.\n")
|
return True
|
||||||
|
|
||||||
except InvalidVideoIdException:
|
except InvalidVideoIdException:
|
||||||
print("Invalid Video ID or URL:", video_id)
|
print("Invalid Video ID or URL:", video_id)
|
||||||
except (TypeError, NoContents) as e:
|
except TypeError as e:
|
||||||
|
|
||||||
print(e.with_traceback())
|
print(e.with_traceback())
|
||||||
|
except NoContents as e:
|
||||||
|
print(e)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
print("The specified directory does not exist.:{}".format(Arguments().output))
|
||||||
except JSONDecodeError as e:
|
except JSONDecodeError as e:
|
||||||
|
|||||||
@@ -6,33 +6,46 @@ https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
|||||||
'''
|
'''
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
ROT = ['\u25F4', '\u25F5', '\u25F6', '\u25F7']
|
||||||
|
|
||||||
|
|
||||||
class ProgressBar:
|
class ProgressBar:
|
||||||
def __init__(self, duration):
|
def __init__(self, total, status):
|
||||||
self._duration = duration
|
|
||||||
self._count = 0
|
|
||||||
self._bar_len = 60
|
self._bar_len = 60
|
||||||
self._cancelled = False
|
self._cancelled = False
|
||||||
|
self.reset(total=total, status=status)
|
||||||
|
self._blinker = 0
|
||||||
|
|
||||||
|
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
||||||
|
self._symbol_done = symbol_done
|
||||||
|
self._symbol_space = symbol_space
|
||||||
|
self._total = total
|
||||||
|
self._status = status
|
||||||
|
self._count = 0
|
||||||
|
|
||||||
def _disp(self, _, fetched):
|
def _disp(self, _, fetched):
|
||||||
self._progress(fetched / 1000, self._duration)
|
self._progress(fetched, self._total)
|
||||||
|
|
||||||
def _progress(self, fillin, total, status=''):
|
def _progress(self, fillin, total):
|
||||||
if total == 0 or self._cancelled:
|
if total == 0 or self._cancelled:
|
||||||
return
|
return
|
||||||
self._count += fillin
|
self._count += fillin
|
||||||
filled_len = int(round(self._bar_len * self._count / float(total)))
|
filled_len = int(round(self._bar_len * self._count / float(total)))
|
||||||
percents = round(100.0 * self._count / float(total), 1)
|
percents = round(100.0 * self._count / float(total), 1)
|
||||||
|
if percents > 100:
|
||||||
|
percents = 100.0
|
||||||
if filled_len > self._bar_len:
|
if filled_len > self._bar_len:
|
||||||
filled_len = self._bar_len
|
filled_len = self._bar_len
|
||||||
percents = 100
|
|
||||||
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
|
bar = self._symbol_done * filled_len + \
|
||||||
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
|
self._symbol_space * (self._bar_len - filled_len)
|
||||||
|
sys.stdout.write(' [%s] %s%s ...%s %s \r' % (bar, percents, '%', self._status, ROT[self._blinker % 4]))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
self._blinker += 1
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if not self._cancelled:
|
if not self._cancelled:
|
||||||
self._progress(self._duration, self._duration)
|
self._progress(self._total, self._total)
|
||||||
|
|
||||||
def cancel(self):
|
def cancel(self):
|
||||||
self._cancelled = True
|
self._cancelled = True
|
||||||
|
|||||||
@@ -43,20 +43,21 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
'''
|
'''
|
||||||
HTMLArchiver saves chat data as HTML table format.
|
HTMLArchiver saves chat data as HTML table format.
|
||||||
'''
|
'''
|
||||||
def __init__(self, save_path):
|
def __init__(self, save_path, callback):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.save_path = self._checkpath(save_path)
|
self.save_path = self._checkpath(save_path)
|
||||||
self.processor = DefaultProcessor()
|
self.processor = DefaultProcessor()
|
||||||
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
|
||||||
self.header = [HEADER_HTML]
|
self.header = [HEADER_HTML]
|
||||||
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
|
||||||
|
self.callback = callback
|
||||||
|
|
||||||
def _checkpath(self, filepath):
|
def _checkpath(self, filepath):
|
||||||
splitter = os.path.splitext(os.path.basename(filepath))
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
body = splitter[0]
|
body = splitter[0]
|
||||||
extention = splitter[1]
|
extention = splitter[1]
|
||||||
newpath = filepath
|
newpath = filepath
|
||||||
counter = 0
|
counter = 1
|
||||||
while os.path.exists(newpath):
|
while os.path.exists(newpath):
|
||||||
match = re.search(PATTERN, body)
|
match = re.search(PATTERN, body)
|
||||||
if match:
|
if match:
|
||||||
@@ -80,8 +81,9 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
"""
|
"""
|
||||||
if chat_components is None or len(chat_components) == 0:
|
if chat_components is None or len(chat_components) == 0:
|
||||||
return
|
return
|
||||||
|
for c in self.processor.process(chat_components).items:
|
||||||
self.body.extend(
|
self.body.extend(
|
||||||
(self._parse_html_line((
|
self._parse_html_line((
|
||||||
c.datetime,
|
c.datetime,
|
||||||
c.elapsedTime,
|
c.elapsedTime,
|
||||||
c.author.name,
|
c.author.name,
|
||||||
@@ -89,8 +91,9 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
c.amountString,
|
c.amountString,
|
||||||
c.author.type,
|
c.author.type,
|
||||||
c.author.channelId)
|
c.author.channelId)
|
||||||
) for c in self.processor.process(chat_components).items)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
self.callback(None, 1)
|
||||||
|
|
||||||
def _parse_html_line(self, raw_line):
|
def _parse_html_line(self, raw_line):
|
||||||
return ''.join(('<tr>',
|
return ''.join(('<tr>',
|
||||||
@@ -131,7 +134,7 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
|
|
||||||
def finalize(self):
|
def finalize(self):
|
||||||
self.header.extend([self._create_styles(), '</head>\n'])
|
self.header.extend([self._create_styles(), '</head>\n'])
|
||||||
self.body.extend(['</table>\n</body>'])
|
self.body.extend(['</table>\n</body>\n</html>'])
|
||||||
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
with open(self.save_path, mode='a', encoding='utf-8') as f:
|
||||||
f.writelines(self.header)
|
f.writelines(self.header)
|
||||||
f.writelines(self.body)
|
f.writelines(self.body)
|
||||||
|
|||||||
@@ -93,7 +93,8 @@ class VideoInfo:
|
|||||||
result = re.search(pattern, text)
|
result = re.search(pattern, text)
|
||||||
if result is None:
|
if result is None:
|
||||||
raise PatternUnmatchError(text)
|
raise PatternUnmatchError(text)
|
||||||
res = json.loads(result.group(1)[:-1])
|
decoder = json.JSONDecoder()
|
||||||
|
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
||||||
response = self._get_item(res, item_response)
|
response = self._get_item(res, item_response)
|
||||||
if response is None:
|
if response is None:
|
||||||
self._check_video_is_private(res.get("args"))
|
self._check_video_is_private(res.get("args"))
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
httpx==0.14.1
|
httpx[http2]==0.14.1
|
||||||
protobuf==3.13.0
|
protobuf==3.13.0
|
||||||
pytz
|
pytz
|
||||||
urllib3
|
urllib3
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
from pytchat.tool.videoinfo import VideoInfo
|
from pytchat.tool.videoinfo import VideoInfo
|
||||||
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
|
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
|
||||||
from pytchat import util
|
|
||||||
|
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
|
|||||||
def test_live_page(mocker):
|
def test_live_page(mocker):
|
||||||
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
|
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
|
||||||
info = VideoInfo('__test_id__')
|
info = VideoInfo('__test_id__')
|
||||||
'''live page :duration = 0'''
|
'''live page: duration==0'''
|
||||||
assert info.get_duration() == 0
|
assert info.get_duration() == 0
|
||||||
assert info.video_id == '__test_id__'
|
assert info.video_id == '__test_id__'
|
||||||
assert info.get_channel_name() == 'BGM channel'
|
assert info.get_channel_name() == 'BGM channel'
|
||||||
@@ -88,3 +87,15 @@ def test_pattern_unmatch(mocker):
|
|||||||
assert False
|
assert False
|
||||||
except PatternUnmatchError:
|
except PatternUnmatchError:
|
||||||
assert True
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
def test_extradata_handling(mocker):
|
||||||
|
'''Test case the extracted data are JSON lines.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/extradata_page.txt', mocker)
|
||||||
|
try:
|
||||||
|
_ = VideoInfo('__test_id__')
|
||||||
|
assert True
|
||||||
|
except JSONDecodeError as e:
|
||||||
|
print(e.doc)
|
||||||
|
assert False
|
||||||
|
|||||||
15
tests/testdata/videoinfo/extradata_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/extradata_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user