Compare commits

...

12 Commits

Author SHA1 Message Date
taizan-hokouto
3106b3e545 Merge branch 'hotfix/filepath' 2020-10-04 11:33:58 +09:00
taizan-hokouto
50816a661d Increment version 2020-10-04 11:30:07 +09:00
taizan-hokouto
6755bc8bb2 Make sure to pass fixed filepath to processor 2020-10-04 11:29:52 +09:00
taizan-hokouto
26be989b9b Merge branch 'hotfix/fix' 2020-10-04 10:32:53 +09:00
taizan-hokouto
73ad0a1f44 Increment version 2020-10-04 10:22:34 +09:00
taizan-hokouto
66b185ebf7 Fix constructing filepath 2020-10-04 10:20:14 +09:00
taizan_hokuto
71650c39f7 Merge branch 'hotfix/fix' 2020-10-03 22:42:48 +09:00
taizan_hokuto
488445c73b Increment version 2020-10-03 22:41:53 +09:00
taizan_hokuto
075e811efe Delete unnecessary code 2020-10-03 22:41:12 +09:00
taizan_hokuto
58d9bf7fdb Merge branch 'hotfix/pattern' 2020-10-03 22:35:46 +09:00
taizan_hokuto
b3e6275de7 Increment version 2020-10-03 22:35:22 +09:00
taizan_hokuto
748778f545 Fix pattern matching 2020-10-03 22:04:09 +09:00
5 changed files with 45 additions and 14 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.5'
__version__ = '0.3.0'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -56,10 +56,10 @@ def main():
try:
video_id = extract_video_id(video_id)
if os.path.exists(Arguments().output):
path = Path(Arguments().output + video_id + '.html')
else:
if not os.path.exists(Arguments().output):
raise FileNotFoundError
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
err = None
for _ in range(3): # retry 3 times
try:
@@ -80,7 +80,7 @@ def main():
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
print(f" output path: {path.resolve()}")
print(f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id,
@@ -91,7 +91,7 @@ def main():
if data == []:
return False
pbar.reset("#", "=", total=len(data), status="Rendering ")
processor = HTMLArchiver(Arguments().output + video_id + '.html', callback=pbar._disp)
processor = HTMLArchiver(path, callback=pbar._disp)
processor.process(
[{'video_id': None,
'timeout': 1,

View File

@@ -116,11 +116,12 @@ class HTMLArchiver(ChatProcessor):
def _encode_img(self, url):
err = None
for _ in range(3):
for _ in range(5):
try:
resp = httpx.get(url)
resp = httpx.get(url, timeout=30)
break
except (NetworkError, ReadTimeout) as e:
print("Network Error. retrying...")
err = e
time.sleep(3)
else:

View File

@@ -9,8 +9,8 @@ from ..util.extract_video_id import extract_video_id
headers = config.headers
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
item_channel_id = [
"videoDetails",
@@ -83,8 +83,16 @@ class VideoInfo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
text = self._get_page_text(self.video_id)
self._parse(text)
for _ in range(3):
try:
text = self._get_page_text(self.video_id)
self._parse(text)
break
except PatternUnmatchError:
time.sleep(2)
pass
else:
raise PatternUnmatchError("Pattern Unmatch")
def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}"
@@ -105,7 +113,7 @@ class VideoInfo:
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError(text)
raise PatternUnmatchError()
decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response)

View File

@@ -1,8 +1,12 @@
import datetime
import httpx
import json
import datetime
import os
import re
from .. import config
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
def extract(url):
_session = httpx.Client(http2=True)
@@ -16,3 +20,21 @@ def save(data, filename, extention):
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
mode='w', encoding='utf-8') as f:
f.writelines(data)
def checkpath(filepath):
splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0]
extention = splitter[1]
newpath = filepath
counter = 1
while os.path.exists(newpath):
match = re.search(PATTERN, body)
if match:
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath