pytchat-fork/pytchat/util/__init__.py

import datetime
import httpx
import json
import os
import re
from urllib.parse import quote
from .. import config
from .. exceptions import InvalidVideoIdException

PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")

PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")

PATTERN_CHANNEL = re.compile(r"\\\"channelId\\\":\\\"(.{24})\\\"")

PATTERN_M_CHANNEL = re.compile(r"\"channelId\":\"(.{24})\"")

YT_VIDEO_ID_LENGTH = 11

CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))

UA = config.headers["user-agent"]


def extract(url):
    _session = httpx.Client(http2=True)
    html = _session.get(url, headers=config.headers)
    with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
                  ) + 'test.json', mode='w', encoding='utf-8') as f:
        json.dump(html.json(), f, ensure_ascii=False)


def save(data, filename, extention) -> str:
    save_filename = filename + "_" + \
        (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
    with open(save_filename, mode='w', encoding='utf-8') as f:
        f.writelines(data)
    return save_filename


def checkpath(filepath):
    splitter = os.path.splitext(os.path.basename(filepath))
    body = splitter[0]
    extention = splitter[1]
    newpath = filepath
    counter = 1
    while os.path.exists(newpath):
        match = re.search(PATTERN, body)
        if match:
            counter = int(match[2]) + 1
            num_with_bracket = f'({str(counter)})'
            body = f'{match[1]}{num_with_bracket}'
        else:
            body = f'{body}({str(counter)})'
        newpath = os.path.join(os.path.dirname(filepath), body + extention)
    return newpath


def get_param(continuation, replay=False, offsetms: int = 0, dat=''):
    if offsetms < 0:
        offsetms = 0
    ret = {
        "context": {
            "client": {
                "visitorData": dat,
                "userAgent": UA,
                "clientName": "WEB",
                "clientVersion": CLIENT_VERSION,
            },

        },
        "continuation": continuation,
    }
    if replay:
        ret.setdefault("currentPlayerState", {
                       "playerOffsetMs": str(int(offsetms))})
    return ret


def extract_video_id(url_or_id: str) -> str:
    ret = ''
    if '[' in url_or_id:
        url_or_id = url_or_id.replace('[', '').replace(']', '')

    if type(url_or_id) != str:
        raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
    if len(url_or_id) == YT_VIDEO_ID_LENGTH:
        return url_or_id
    match = re.search(PATTERN_YTURL, url_or_id)
    if match is None:
        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
    try:
        ret = match.group(4)
    except IndexError:
        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")

    if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
    return ret


def get_channelid(client, video_id):
    resp = client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
    match = re.search(PATTERN_CHANNEL, resp.text)
    try:
        if match is None:
            raise IndexError
        ret = match.group(1)
    except IndexError:
        ret = get_channelid_2nd(client, video_id)
    return ret


def get_channelid_2nd(client, video_id):
    resp = client.get("https://m.youtube.com/watch?v={}".format(quote(video_id)), headers=config.m_headers)

    match = re.search(PATTERN_M_CHANNEL, resp.text)
    if match is None:
        raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
    try:
        ret = match.group(1)
    except IndexError:
        raise InvalidVideoIdException(f"Invalid video id: {video_id}")
    return ret


async def get_channelid_async(client, video_id):
    resp = await client.get("https://www.youtube.com/embed/{}".format(quote(video_id)), headers=config.headers)
    match = re.search(PATTERN_CHANNEL, resp.text)
    try:
        if match is None:
            raise IndexError
        ret = match.group(1)
    except IndexError:
        ret = await get_channelid_async_2nd(client, video_id)
    return ret

async def get_channelid_async_2nd(client, video_id):
    resp = await client.get("https://m.youtube.com/watch?v={}".format(quote(video_id)), headers=config.m_headers)
    match = re.search(PATTERN_M_CHANNEL, resp.text)
    if match is None:
        raise InvalidVideoIdException(f"Cannot find channel id for video id:{video_id}. This video id seems to be invalid.")
    try:
        ret = match.group(1)
    except IndexError:
        raise InvalidVideoIdException(f"Invalid video id: {video_id}")
    return ret