Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
480c9e15b8 | ||
|
|
35aa7636f6 | ||
|
|
8fee67c2d4 | ||
|
|
d3f1643a40 | ||
|
|
eb29f27493 | ||
|
|
8adf75ab83 | ||
|
|
2e05803d75 | ||
|
|
f16c0ee73a | ||
|
|
a338f2b782 |
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.2.0'
|
__version__ = '0.2.2'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import signal
|
import signal
|
||||||
|
import time
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from .arguments import Arguments
|
from .arguments import Arguments
|
||||||
|
from .progressbar import ProgressBar
|
||||||
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
|
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
|
||||||
from .. processors.html_archiver import HTMLArchiver
|
from .. processors.html_archiver import HTMLArchiver
|
||||||
from .. tool.extract.extractor import Extractor
|
from .. tool.extract.extractor import Extractor
|
||||||
@@ -32,18 +33,12 @@ def main():
|
|||||||
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||||
help='Output directory (end with "/"). default="./"', default='./')
|
help='Output directory (end with "/"). default="./"', default='./')
|
||||||
parser.add_argument(f'--{Arguments.Name.PBAR}', action='store_true',
|
|
||||||
help='Display rich progress bar')
|
|
||||||
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
|
||||||
help='Save error data when error occurs(".dat" file)')
|
help='Save error data when error occurs(".dat" file)')
|
||||||
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
help='Show version')
|
help='Show version')
|
||||||
Arguments(parser.parse_args().__dict__)
|
Arguments(parser.parse_args().__dict__)
|
||||||
|
|
||||||
if Arguments().pbar:
|
|
||||||
from .progressbar_rich import ProgressBar
|
|
||||||
else:
|
|
||||||
from .progressbar_simple import ProgressBar
|
|
||||||
if Arguments().print_version:
|
if Arguments().print_version:
|
||||||
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
|
||||||
return
|
return
|
||||||
@@ -52,7 +47,7 @@ def main():
|
|||||||
if not Arguments().video_ids:
|
if not Arguments().video_ids:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return
|
return
|
||||||
for video_id in Arguments().video_ids:
|
for counter, video_id in enumerate(Arguments().video_ids):
|
||||||
if '[' in video_id:
|
if '[' in video_id:
|
||||||
video_id = video_id.replace('[', '').replace(']', '')
|
video_id = video_id.replace('[', '').replace(']', '')
|
||||||
try:
|
try:
|
||||||
@@ -61,8 +56,24 @@ def main():
|
|||||||
path = Path(Arguments().output + video_id + '.html')
|
path = Path(Arguments().output + video_id + '.html')
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
|
err = None
|
||||||
|
for _ in range(3): # retry 3 times
|
||||||
|
try:
|
||||||
info = VideoInfo(video_id)
|
info = VideoInfo(video_id)
|
||||||
print(f"Extracting...\n"
|
break
|
||||||
|
except (PatternUnmatchError, JSONDecodeError, InvalidVideoIdException) as e:
|
||||||
|
err = e
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print("Cannot parse video information.:{}".format(video_id))
|
||||||
|
if Arguments().save_error_data:
|
||||||
|
util.save(err.doc, "ERR", ".dat")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(Arguments().video_ids) > 1:
|
||||||
|
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
|
||||||
|
print(f"\n"
|
||||||
f" video_id: {video_id}\n"
|
f" video_id: {video_id}\n"
|
||||||
f" channel: {info.get_channel_name()}\n"
|
f" channel: {info.get_channel_name()}\n"
|
||||||
f" title: {info.get_title()}")
|
f" title: {info.get_title()}")
|
||||||
@@ -77,10 +88,7 @@ def main():
|
|||||||
data = ex.extract()
|
data = ex.extract()
|
||||||
if data == []:
|
if data == []:
|
||||||
return False
|
return False
|
||||||
if Arguments().pbar:
|
|
||||||
pbar.reset("#", "=", total=len(data), status="Rendering ")
|
pbar.reset("#", "=", total=len(data), status="Rendering ")
|
||||||
else:
|
|
||||||
pbar.reset("=", "", total=len(data), status="Rendering ")
|
|
||||||
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
|
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
|
||||||
processor.process(
|
processor.process(
|
||||||
[{'video_id': None,
|
[{'video_id': None,
|
||||||
@@ -88,19 +96,11 @@ def main():
|
|||||||
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
)
|
)
|
||||||
processor.finalize()
|
processor.finalize()
|
||||||
if Arguments().pbar:
|
|
||||||
pbar.reset('#', '#', status='Completed ')
|
pbar.reset('#', '#', status='Completed ')
|
||||||
pbar.close()
|
pbar.close()
|
||||||
else:
|
|
||||||
pbar.close()
|
|
||||||
print("\nCompleted")
|
|
||||||
|
|
||||||
print()
|
print()
|
||||||
if pbar.is_cancelled():
|
if pbar.is_cancelled():
|
||||||
print("\nThe extraction process has been discontinued.\n")
|
print("\nThe extraction process has been discontinued.\n")
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
except InvalidVideoIdException:
|
except InvalidVideoIdException:
|
||||||
print("Invalid Video ID or URL:", video_id)
|
print("Invalid Video ID or URL:", video_id)
|
||||||
except NoContents as e:
|
except NoContents as e:
|
||||||
@@ -109,14 +109,9 @@ def main():
|
|||||||
print("The specified directory does not exist.:{}".format(Arguments().output))
|
print("The specified directory does not exist.:{}".format(Arguments().output))
|
||||||
except JSONDecodeError as e:
|
except JSONDecodeError as e:
|
||||||
print(e.msg)
|
print(e.msg)
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
print("JSONDecodeError.:{}".format(video_id))
|
||||||
if Arguments().save_error_data:
|
if Arguments().save_error_data:
|
||||||
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
|
||||||
except PatternUnmatchError as e:
|
|
||||||
print(e.msg)
|
|
||||||
print("Cannot parse video information.:{}".format(video_id))
|
|
||||||
if Arguments().save_error_data:
|
|
||||||
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ class Arguments(metaclass=Singleton):
|
|||||||
OUTPUT: str = 'output_dir'
|
OUTPUT: str = 'output_dir'
|
||||||
VIDEO_IDS: str = 'video_id'
|
VIDEO_IDS: str = 'video_id'
|
||||||
SAVE_ERROR_DATA: bool = 'save_error_data'
|
SAVE_ERROR_DATA: bool = 'save_error_data'
|
||||||
PBAR: bool ='pbar'
|
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||||
@@ -37,7 +36,7 @@ class Arguments(metaclass=Singleton):
|
|||||||
self.output: str = arguments[Arguments.Name.OUTPUT]
|
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||||
self.video_ids: List[int] = []
|
self.video_ids: List[int] = []
|
||||||
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
|
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
|
||||||
self.pbar: bool = arguments[Arguments.Name.PBAR]
|
|
||||||
# Videos
|
# Videos
|
||||||
if arguments[Arguments.Name.VIDEO_IDS]:
|
if arguments[Arguments.Name.VIDEO_IDS]:
|
||||||
self.video_ids = [video_id
|
self.video_ids = [video_id
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ vladignatyev/progress.py
|
|||||||
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
||||||
(MIT License)
|
(MIT License)
|
||||||
'''
|
'''
|
||||||
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@@ -15,6 +16,7 @@ class ProgressBar:
|
|||||||
self._blinker = 0
|
self._blinker = 0
|
||||||
|
|
||||||
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
||||||
|
self.con_width = shutil.get_terminal_size(fallback=(80, 24)).columns
|
||||||
self._symbol_done = symbol_done
|
self._symbol_done = symbol_done
|
||||||
self._symbol_space = symbol_space
|
self._symbol_space = symbol_space
|
||||||
self._total = total
|
self._total = total
|
||||||
@@ -37,7 +39,9 @@ class ProgressBar:
|
|||||||
|
|
||||||
bar = self._symbol_done * filled_len + \
|
bar = self._symbol_done * filled_len + \
|
||||||
self._symbol_space * (self._bar_len - filled_len)
|
self._symbol_space * (self._bar_len - filled_len)
|
||||||
sys.stdout.write(' [%s] %s%s ...%s \r' % (bar, percents, '%', self._status))
|
disp = f" [{bar}] {percents:>5.1f}% ...{self._status} "[:self.con_width - 1] + '\r'
|
||||||
|
|
||||||
|
sys.stdout.write(disp)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
self._blinker += 1
|
self._blinker += 1
|
||||||
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
'''
|
|
||||||
This code for this progress bar is based on
|
|
||||||
vladignatyev/progress.py
|
|
||||||
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
|
|
||||||
(MIT License)
|
|
||||||
'''
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class ProgressBar:
|
|
||||||
def __init__(self, total, status):
|
|
||||||
self._bar_len = 60
|
|
||||||
self._cancelled = False
|
|
||||||
print(''.join([' ' * 10, '|', '-' * (self._bar_len), '|']), end="")
|
|
||||||
self.reset(total=total, status=status)
|
|
||||||
|
|
||||||
def reset(self, symbol_done="=", symbol_space=" ", total=100, status=''):
|
|
||||||
self._symbol_done = symbol_done
|
|
||||||
self._symbol_space = symbol_space
|
|
||||||
self._total = total
|
|
||||||
self._status = status
|
|
||||||
self._old_len = 0
|
|
||||||
self._count = 0
|
|
||||||
print()
|
|
||||||
print(f'{status:<11}', end='')
|
|
||||||
|
|
||||||
def _disp(self, _, fetched):
|
|
||||||
self._progress(fetched, self._total)
|
|
||||||
|
|
||||||
def _progress(self, fillin, total):
|
|
||||||
if total == 0 or self._cancelled:
|
|
||||||
return
|
|
||||||
self._count += fillin
|
|
||||||
filled_len = int(round(self._bar_len * self._count / float(total)))
|
|
||||||
if filled_len > self._bar_len:
|
|
||||||
filled_len = self._bar_len
|
|
||||||
print((filled_len - self._old_len) * self._symbol_done, end="")
|
|
||||||
sys.stdout.flush()
|
|
||||||
self._old_len = filled_len
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
if not self._cancelled:
|
|
||||||
self._progress(self._total, self._total)
|
|
||||||
|
|
||||||
def cancel(self):
|
|
||||||
self._cancelled = True
|
|
||||||
|
|
||||||
def is_cancelled(self):
|
|
||||||
return self._cancelled
|
|
||||||
@@ -38,7 +38,10 @@ class InvalidVideoIdException(Exception):
|
|||||||
'''
|
'''
|
||||||
Thrown when the video_id is not exist (VideoInfo).
|
Thrown when the video_id is not exist (VideoInfo).
|
||||||
'''
|
'''
|
||||||
pass
|
def __init__(self, doc):
|
||||||
|
self.msg = "InvalidVideoIdException"
|
||||||
|
self.doc = doc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class UnknownConnectionError(Exception):
|
class UnknownConnectionError(Exception):
|
||||||
@@ -47,7 +50,7 @@ class UnknownConnectionError(Exception):
|
|||||||
|
|
||||||
class RetryExceedMaxCount(Exception):
|
class RetryExceedMaxCount(Exception):
|
||||||
'''
|
'''
|
||||||
thrown when the number of retries exceeds the maximum value.
|
Thrown when the number of retries exceeds the maximum value.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -66,13 +69,13 @@ class FailedExtractContinuation(ChatDataFinished):
|
|||||||
|
|
||||||
class VideoInfoParseError(Exception):
|
class VideoInfoParseError(Exception):
|
||||||
'''
|
'''
|
||||||
thrown when failed to parse video info
|
Base exception when parsing video info.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
class PatternUnmatchError(VideoInfoParseError):
|
class PatternUnmatchError(VideoInfoParseError):
|
||||||
'''
|
'''
|
||||||
thrown when failed to parse video info with unmatched pattern
|
Thrown when failed to parse video info with unmatched pattern.
|
||||||
'''
|
'''
|
||||||
def __init__(self, doc):
|
def __init__(self, doc):
|
||||||
self.msg = "PatternUnmatchError"
|
self.msg = "PatternUnmatchError"
|
||||||
|
|||||||
Reference in New Issue
Block a user