Merge tag 'network' into develop

v0.2.5
This commit is contained in:
taizan-hokuto
2020-09-14 00:40:40 +09:00
6 changed files with 57 additions and 12 deletions

View File

@@ -2,7 +2,7 @@
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.2.4' __version__ = '0.2.5'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -5,9 +5,10 @@ import signal
import time import time
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
from .arguments import Arguments from .arguments import Arguments
from .progressbar import ProgressBar from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
from .. processors.html_archiver import HTMLArchiver from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo from .. tool.videoinfo import VideoInfo
@@ -50,6 +51,9 @@ def main():
for counter, video_id in enumerate(Arguments().video_ids): for counter, video_id in enumerate(Arguments().video_ids):
if '[' in video_id: if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '') video_id = video_id.replace('[', '').replace(']', '')
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try: try:
video_id = extract_video_id(video_id) video_id = extract_video_id(video_id)
if os.path.exists(Arguments().output): if os.path.exists(Arguments().output):
@@ -71,8 +75,6 @@ def main():
util.save(err.doc, "ERR", ".dat") util.save(err.doc, "ERR", ".dat")
continue continue
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
print(f"\n" print(f"\n"
f" video_id: {video_id}\n" f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n" f" channel: {info.get_channel_name()}\n"
@@ -112,6 +114,12 @@ def main():
print("JSONDecodeError.:{}".format(video_id)) print("JSONDecodeError.:{}".format(video_id))
if Arguments().save_error_data: if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat") util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
except PatternUnmatchError:
print(f"PatternUnmatchError [{video_id}]. ")
except Exception as e:
print(type(e), str(e))
return return

View File

@@ -1,9 +1,12 @@
import httpx
import os import os
import re import re
import httpx import time
from base64 import standard_b64encode from base64 import standard_b64encode
from httpx import NetworkError, ReadTimeout
from .chat_processor import ChatProcessor from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor from .default.processor import DefaultProcessor
from ..exceptions import UnknownConnectionError
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -112,7 +115,17 @@ class HTMLArchiver(ChatProcessor):
for item in message_items) for item in message_items)
def _encode_img(self, url): def _encode_img(self, url):
resp = httpx.get(url) err = None
for _ in range(3):
try:
resp = httpx.get(url)
break
except (NetworkError, ReadTimeout) as e:
err = e
time.sleep(3)
else:
raise UnknownConnectionError(str(err))
return standard_b64encode(resp.content).decode() return standard_b64encode(resp.content).decode()
def _set_emoji_table(self, item: dict): def _set_emoji_table(self, item: dict):

View File

@@ -8,9 +8,11 @@ from ... import config
from ... paramgen import arcparam from ... paramgen import arcparam
from ... exceptions import UnknownConnectionError from ... exceptions import UnknownConnectionError
from concurrent.futures import CancelledError from concurrent.futures import CancelledError
from httpx import NetworkError, ReadTimeout
from json import JSONDecodeError from json import JSONDecodeError
from urllib.parse import quote from urllib.parse import quote
headers = config.headers headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation=" "get_live_chat_replay?continuation="
@@ -66,6 +68,7 @@ def ready_blocks(video_id, duration, div, callback):
async def _create_block(session, video_id, seektime, callback): async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime=seektime) continuation = arcparam.getparam(video_id, seektime=seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
err = None
for _ in range(MAX_RETRY_COUNT): for _ in range(MAX_RETRY_COUNT):
try: try:
if continuation in param_set: if continuation in param_set:
@@ -77,9 +80,12 @@ def ready_blocks(video_id, duration, div, callback):
break break
except JSONDecodeError: except JSONDecodeError:
await asyncio.sleep(3) await asyncio.sleep(3)
except (NetworkError, ReadTimeout) as e:
err = e
await asyncio.sleep(3)
else: else:
cancel() cancel()
raise UnknownConnectionError("Abort: Unknown connection error.") raise UnknownConnectionError("Abort:" + str(err))
if actions: if actions:
first = parser.get_offset(actions[0]) first = parser.get_offset(actions[0])
@@ -118,6 +124,7 @@ def fetch_patch(callback, blocks, video_id):
async def _fetch(continuation, session) -> Patch: async def _fetch(continuation, session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
err = None
for _ in range(MAX_RETRY_COUNT): for _ in range(MAX_RETRY_COUNT):
try: try:
if continuation in param_set: if continuation in param_set:
@@ -129,9 +136,12 @@ def fetch_patch(callback, blocks, video_id):
break break
except JSONDecodeError: except JSONDecodeError:
await asyncio.sleep(3) await asyncio.sleep(3)
except (NetworkError, ReadTimeout) as e:
err = e
await asyncio.sleep(3)
else: else:
cancel() cancel()
raise UnknownConnectionError("Abort: Unknown connection error.") raise UnknownConnectionError("Abort:" + str(err))
if actions: if actions:
last = parser.get_offset(actions[-1]) last = parser.get_offset(actions[-1])

View File

@@ -93,4 +93,5 @@ class Extractor:
return ret return ret
def cancel(self): def cancel(self):
print("cancel")
asyncdl.cancel() asyncdl.cancel()

View File

@@ -1,10 +1,13 @@
import httpx
import json import json
import re import re
import httpx import time
from httpx import ConnectError, NetworkError
from .. import config from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id from ..util.extract_video_id import extract_video_id
headers = config.headers headers = config.headers
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})") pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
@@ -85,8 +88,18 @@ class VideoInfo:
def _get_page_text(self, video_id): def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}" url = f"https://www.youtube.com/embed/{video_id}"
resp = httpx.get(url, headers=headers) err = None
resp.raise_for_status() for _ in range(3):
try:
resp = httpx.get(url, headers=headers)
resp.raise_for_status()
break
except (ConnectError, NetworkError) as e:
err = e
time.sleep(3)
else:
raise UnknownConnectionError(str(err))
return resp.text return resp.text
def _parse(self, text): def _parse(self, text):