Compare commits

..

13 Commits

Author SHA1 Message Date
taizan-hokuto
4e829a25d4 Merge branch 'release/v0.1.8' 2020-09-06 18:27:57 +09:00
taizan-hokuto
15132a9bb8 Increment version 2020-09-06 18:27:08 +09:00
taizan-hokuto
64ace9dad6 Update progress bar 2020-09-06 18:25:16 +09:00
taizan-hokuto
9a2e96d3a0 Merge tag 'extract_vid' into develop
v0.1.7
2020-09-04 01:55:42 +09:00
taizan-hokuto
a3695a59b8 Merge branch 'hotfix/extract_vid' 2020-09-04 01:55:41 +09:00
taizan-hokuto
bc8655ed62 Increment version 2020-09-04 01:53:14 +09:00
taizan-hokuto
3bdc465740 Devide exception handling 2020-09-04 01:52:53 +09:00
taizan-hokuto
235d6b7212 Fix extract video info 2020-09-04 01:46:10 +09:00
taizan-hokuto
9f0754da57 Merge tag 'http2' into develop
v0.1.6
2020-09-03 21:27:48 +09:00
taizan-hokuto
306b0a4564 Merge branch 'hotfix/http2' 2020-09-03 21:27:48 +09:00
taizan-hokuto
1c49387f1a Increment version 2020-09-03 21:24:42 +09:00
taizan-hokuto
300d96e56c Fix requirements.txt 2020-09-03 21:24:21 +09:00
taizan-hokuto
0e301f48a8 Merge tag 'v0.1.5' into develop
v0.1.5
2020-09-03 20:16:56 +09:00
8 changed files with 92 additions and 36 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.1.5' __version__ = '0.1.8'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -62,23 +62,36 @@ def main():
print(f" output path: {path.resolve()}") print(f" output path: {path.resolve()}")
duration = info.get_duration() duration = info.get_duration()
pbar = ProgressBar(duration) pbar = ProgressBar(total=(duration * 1000) / 0.99, status="Extracting")
ex = Extractor(video_id, ex = Extractor(video_id,
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
callback=pbar._disp, callback=pbar._disp,
div=10) div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar))) signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
ex.extract() data = ex.extract()
if data == []:
return False
pbar.reset("#", "=", total=len(data), status="Rendering ")
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
pbar.reset('#', '#', status='Completed ')
pbar.close() pbar.close()
print()
if pbar.is_cancelled(): if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n") print("\nThe extraction process has been discontinued.\n")
return return False
print("\nThe extraction process has been completed.\n") return True
except InvalidVideoIdException: except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id) print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e: except TypeError as e:
print(e.with_traceback()) print(e.with_traceback())
except NoContents as e:
print(e)
except FileNotFoundError: except FileNotFoundError:
print("The specified directory does not exist.:{}".format(Arguments().output)) print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e: except JSONDecodeError as e:

View File

@@ -6,33 +6,46 @@ https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
''' '''
import sys import sys
ROT = ['\u25F4', '\u25F5', '\u25F6', '\u25F7']
class ProgressBar: class ProgressBar:
def __init__(self, duration): def __init__(self, total, status):
self._duration = duration
self._count = 0
self._bar_len = 60 self._bar_len = 60
self._cancelled = False self._cancelled = False
self.reset(total=total, status=status)
self._blinker = 0
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
self._status = status
self._count = 0
def _disp(self, _, fetched): def _disp(self, _, fetched):
self._progress(fetched / 1000, self._duration) self._progress(fetched, self._total)
def _progress(self, fillin, total, status=''): def _progress(self, fillin, total):
if total == 0 or self._cancelled: if total == 0 or self._cancelled:
return return
self._count += fillin self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total))) filled_len = int(round(self._bar_len * self._count / float(total)))
percents = round(100.0 * self._count / float(total), 1) percents = round(100.0 * self._count / float(total), 1)
if percents > 100:
percents = 100.0
if filled_len > self._bar_len: if filled_len > self._bar_len:
filled_len = self._bar_len filled_len = self._bar_len
percents = 100
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len) bar = self._symbol_done * filled_len + \
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status)) self._symbol_space * (self._bar_len - filled_len)
sys.stdout.write(' [%s] %s%s ...%s %s \r' % (bar, percents, '%', self._status, ROT[self._blinker % 4]))
sys.stdout.flush() sys.stdout.flush()
self._blinker += 1
def close(self): def close(self):
if not self._cancelled: if not self._cancelled:
self._progress(self._duration, self._duration) self._progress(self._total, self._total)
def cancel(self): def cancel(self):
self._cancelled = True self._cancelled = True

View File

@@ -43,20 +43,21 @@ class HTMLArchiver(ChatProcessor):
''' '''
HTMLArchiver saves chat data as HTML table format. HTMLArchiver saves chat data as HTML table format.
''' '''
def __init__(self, save_path): def __init__(self, save_path, callback):
super().__init__() super().__init__()
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor() self.processor = DefaultProcessor()
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML] self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)] self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
self.callback = callback
def _checkpath(self, filepath): def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath)) splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0] body = splitter[0]
extention = splitter[1] extention = splitter[1]
newpath = filepath newpath = filepath
counter = 0 counter = 1
while os.path.exists(newpath): while os.path.exists(newpath):
match = re.search(PATTERN, body) match = re.search(PATTERN, body)
if match: if match:
@@ -80,8 +81,9 @@ class HTMLArchiver(ChatProcessor):
""" """
if chat_components is None or len(chat_components) == 0: if chat_components is None or len(chat_components) == 0:
return return
for c in self.processor.process(chat_components).items:
self.body.extend( self.body.extend(
(self._parse_html_line(( self._parse_html_line((
c.datetime, c.datetime,
c.elapsedTime, c.elapsedTime,
c.author.name, c.author.name,
@@ -89,8 +91,9 @@ class HTMLArchiver(ChatProcessor):
c.amountString, c.amountString,
c.author.type, c.author.type,
c.author.channelId) c.author.channelId)
) for c in self.processor.process(chat_components).items)
) )
)
self.callback(None, 1)
def _parse_html_line(self, raw_line): def _parse_html_line(self, raw_line):
return ''.join(('<tr>', return ''.join(('<tr>',
@@ -131,7 +134,7 @@ class HTMLArchiver(ChatProcessor):
def finalize(self): def finalize(self):
self.header.extend([self._create_styles(), '</head>\n']) self.header.extend([self._create_styles(), '</head>\n'])
self.body.extend(['</table>\n</body>']) self.body.extend(['</table>\n</body>\n</html>'])
with open(self.save_path, mode='a', encoding='utf-8') as f: with open(self.save_path, mode='a', encoding='utf-8') as f:
f.writelines(self.header) f.writelines(self.header)
f.writelines(self.body) f.writelines(self.body)

View File

@@ -93,7 +93,8 @@ class VideoInfo:
result = re.search(pattern, text) result = re.search(pattern, text)
if result is None: if result is None:
raise PatternUnmatchError(text) raise PatternUnmatchError(text)
res = json.loads(result.group(1)[:-1]) decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response) response = self._get_item(res, item_response)
if response is None: if response is None:
self._check_video_is_private(res.get("args")) self._check_video_is_private(res.get("args"))

View File

@@ -1,4 +1,4 @@
httpx==0.14.1 httpx[http2]==0.14.1
protobuf==3.13.0 protobuf==3.13.0
pytz pytz
urllib3 urllib3

View File

@@ -1,7 +1,6 @@
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
from pytchat import util
def _open_file(path): def _open_file(path):
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
def test_live_page(mocker): def test_live_page(mocker):
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker) _set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
info = VideoInfo('__test_id__') info = VideoInfo('__test_id__')
'''live page :duration = 0''' '''live page: duration==0'''
assert info.get_duration() == 0 assert info.get_duration() == 0
assert info.video_id == '__test_id__' assert info.video_id == '__test_id__'
assert info.get_channel_name() == 'BGM channel' assert info.get_channel_name() == 'BGM channel'
@@ -88,3 +87,15 @@ def test_pattern_unmatch(mocker):
assert False assert False
except PatternUnmatchError: except PatternUnmatchError:
assert True assert True
def test_extradata_handling(mocker):
'''Test case the extracted data are JSON lines.'''
_set_test_data(
'tests/testdata/videoinfo/extradata_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert True
except JSONDecodeError as e:
print(e.doc)
assert False

File diff suppressed because one or more lines are too long