Merge tag 'pattern' into develop

v0.2.6
This commit is contained in:
taizan_hokuto
2020-10-03 22:35:46 +09:00
5 changed files with 45 additions and 11 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.5'
__version__ = '0.2.6'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -57,7 +57,10 @@ def main():
try:
video_id = extract_video_id(video_id)
if os.path.exists(Arguments().output):
path = Path(Arguments().output + video_id + '.html')
if Arguments().output[-1] != "/" or Arguments().output[-1] != "\\":
Arguments().output = '/'.join([Arguments().output, os.path.sep])
path = util.checkpath(Path.resolve(Path(Arguments().output + video_id + '.html')))
print(path)
else:
raise FileNotFoundError
err = None
@@ -80,7 +83,7 @@ def main():
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
print(f" output path: {path.resolve()}")
print(f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
ex = Extractor(video_id,

View File

@@ -116,11 +116,12 @@ class HTMLArchiver(ChatProcessor):
def _encode_img(self, url):
err = None
for _ in range(3):
for _ in range(5):
try:
resp = httpx.get(url)
resp = httpx.get(url, timeout=30)
break
except (NetworkError, ReadTimeout) as e:
print("Network Error. retrying...")
err = e
time.sleep(3)
else:

View File

@@ -10,7 +10,7 @@ from ..util.extract_video_id import extract_video_id
headers = config.headers
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
item_channel_id = [
"videoDetails",
@@ -83,8 +83,16 @@ class VideoInfo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
for _ in range(3):
try:
text = self._get_page_text(self.video_id)
self._parse(text)
break
except PatternUnmatchError:
time.sleep(2)
pass
else:
raise PatternUnmatchError("Pattern Unmatch")
def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}"
@@ -105,7 +113,7 @@ class VideoInfo:
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError(text)
raise PatternUnmatchError()
decoder = json.JSONDecoder()
res = decoder.raw_decode(result.group(1)[:-1])[0]
response = self._get_item(res, item_response)

View File

@@ -1,8 +1,12 @@
import datetime
import httpx
import json
import datetime
import os
import re
from .. import config
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
def extract(url):
_session = httpx.Client(http2=True)
@@ -16,3 +20,21 @@ def save(data, filename, extention):
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
mode='w', encoding='utf-8') as f:
f.writelines(data)
def checkpath(filepath):
splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0]
extention = splitter[1]
newpath = filepath
counter = 1
while os.path.exists(newpath):
match = re.search(PATTERN, body)
if match:
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath