Merge branch 'hotfix/pattern'
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.2.5'
|
__version__ = '0.2.6'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
|
|||||||
@@ -57,7 +57,10 @@ def main():
|
|||||||
try:
|
try:
|
||||||
video_id = extract_video_id(video_id)
|
video_id = extract_video_id(video_id)
|
||||||
if os.path.exists(Arguments().output):
|
if os.path.exists(Arguments().output):
|
||||||
path = Path(Arguments().output + video_id + '.html')
|
if Arguments().output[-1] != "/" or Arguments().output[-1] != "\\":
|
||||||
|
Arguments().output = '/'.join([Arguments().output, os.path.sep])
|
||||||
|
path = util.checkpath(Path.resolve(Path(Arguments().output + video_id + '.html')))
|
||||||
|
print(path)
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
err = None
|
err = None
|
||||||
@@ -80,7 +83,7 @@ def main():
|
|||||||
f" channel: {info.get_channel_name()}\n"
|
f" channel: {info.get_channel_name()}\n"
|
||||||
f" title: {info.get_title()}")
|
f" title: {info.get_title()}")
|
||||||
|
|
||||||
print(f" output path: {path.resolve()}")
|
print(f" output path: {path}")
|
||||||
duration = info.get_duration()
|
duration = info.get_duration()
|
||||||
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
pbar = ProgressBar(total=(duration * 1000), status="Extracting")
|
||||||
ex = Extractor(video_id,
|
ex = Extractor(video_id,
|
||||||
|
|||||||
@@ -116,11 +116,12 @@ class HTMLArchiver(ChatProcessor):
|
|||||||
|
|
||||||
def _encode_img(self, url):
|
def _encode_img(self, url):
|
||||||
err = None
|
err = None
|
||||||
for _ in range(3):
|
for _ in range(5):
|
||||||
try:
|
try:
|
||||||
resp = httpx.get(url)
|
resp = httpx.get(url, timeout=30)
|
||||||
break
|
break
|
||||||
except (NetworkError, ReadTimeout) as e:
|
except (NetworkError, ReadTimeout) as e:
|
||||||
|
print("Network Error. retrying...")
|
||||||
err = e
|
err = e
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ from ..util.extract_video_id import extract_video_id
|
|||||||
|
|
||||||
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
|
|
||||||
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
|
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
|
||||||
|
|
||||||
item_channel_id = [
|
item_channel_id = [
|
||||||
"videoDetails",
|
"videoDetails",
|
||||||
@@ -83,8 +83,16 @@ class VideoInfo:
|
|||||||
|
|
||||||
def __init__(self, video_id):
|
def __init__(self, video_id):
|
||||||
self.video_id = extract_video_id(video_id)
|
self.video_id = extract_video_id(video_id)
|
||||||
text = self._get_page_text(self.video_id)
|
for _ in range(3):
|
||||||
self._parse(text)
|
try:
|
||||||
|
text = self._get_page_text(self.video_id)
|
||||||
|
self._parse(text)
|
||||||
|
break
|
||||||
|
except PatternUnmatchError:
|
||||||
|
time.sleep(2)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise PatternUnmatchError("Pattern Unmatch")
|
||||||
|
|
||||||
def _get_page_text(self, video_id):
|
def _get_page_text(self, video_id):
|
||||||
url = f"https://www.youtube.com/embed/{video_id}"
|
url = f"https://www.youtube.com/embed/{video_id}"
|
||||||
@@ -105,7 +113,7 @@ class VideoInfo:
|
|||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
result = re.search(pattern, text)
|
result = re.search(pattern, text)
|
||||||
if result is None:
|
if result is None:
|
||||||
raise PatternUnmatchError(text)
|
raise PatternUnmatchError()
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
res = decoder.raw_decode(result.group(1)[:-1])[0]
|
||||||
response = self._get_item(res, item_response)
|
response = self._get_item(res, item_response)
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
|
import datetime
|
||||||
import httpx
|
import httpx
|
||||||
import json
|
import json
|
||||||
import datetime
|
import os
|
||||||
|
import re
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
|
||||||
|
|
||||||
def extract(url):
|
def extract(url):
|
||||||
_session = httpx.Client(http2=True)
|
_session = httpx.Client(http2=True)
|
||||||
@@ -16,3 +20,21 @@ def save(data, filename, extention):
|
|||||||
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
|
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
|
||||||
mode='w', encoding='utf-8') as f:
|
mode='w', encoding='utf-8') as f:
|
||||||
f.writelines(data)
|
f.writelines(data)
|
||||||
|
|
||||||
|
|
||||||
|
def checkpath(filepath):
|
||||||
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
|
body = splitter[0]
|
||||||
|
extention = splitter[1]
|
||||||
|
newpath = filepath
|
||||||
|
counter = 1
|
||||||
|
while os.path.exists(newpath):
|
||||||
|
match = re.search(PATTERN, body)
|
||||||
|
if match:
|
||||||
|
counter = int(match[2]) + 1
|
||||||
|
num_with_bracket = f'({str(counter)})'
|
||||||
|
body = f'{match[1]}{num_with_bracket}'
|
||||||
|
else:
|
||||||
|
body = f'{body}({str(counter)})'
|
||||||
|
newpath = os.path.join(os.path.dirname(filepath), body + extention)
|
||||||
|
return newpath
|
||||||
|
|||||||
Reference in New Issue
Block a user