From 02d48cecccf251d8f961f0d6b3743fe774ccfcfb Mon Sep 17 00:00:00 2001
From: taizan-hokouto <55448286+taizan-hokuto@users.noreply.github.com>
Date: Sat, 5 Dec 2020 14:42:02 +0900
Subject: [PATCH] Fix process

---
 pytchat/config/__init__.py                    |  6 +-
 pytchat/parser/live.py                        | 25 ++++----
 .../processors/compatible/renderer/base.py    | 17 +++---
 .../compatible/renderer/currency.py           | 12 ++--
 pytchat/tool/extract/asyncdl.py               | 54 ++++++++++--------
 pytchat/tool/extract/parser.py                | 14 +++--
 pytchat/tool/extract/worker.py                |  2 +-
 pytchat/util/__init__.py                      | 57 ++++++++++++++++++-
 8 files changed, 126 insertions(+), 61 deletions(-)

diff --git a/pytchat/config/__init__.py b/pytchat/config/__init__.py
index e362819..4f26a1e 100644
--- a/pytchat/config/__init__.py
+++ b/pytchat/config/__init__.py
@@ -1,9 +1,13 @@
 import logging  # noqa
 from . import mylogger
+from base64 import a85decode as dc
 headers = {
-    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36 Edg/86.0.622.63,gzip(gfe)',
 }
 
+_sml = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG%G#jBOQ!ICLqcS5tQB2;gCZ)?UdXC;f$GR3)MM2<(0>O7mh!,G@+K5?SO9T@okV").decode()
+_smr = dc(b"BQS?8F#ks-GB\\6`H#IhIF^eo7@rH3;H#IhIF^eor06T''Ch\\'(?XmbXF>%9<FC/iuG%G#jBOQ!iEb03+@<k(QAU-F)8U=fDGsP557S5F7CiNH7;)D3N77^*B6YU@\\?WfBr0emZX=#^").decode()
+
 
 def logger(module_name: str, loglevel=None):
     module_logger = mylogger.get_logger(module_name, loglevel=loglevel)
diff --git a/pytchat/parser/live.py b/pytchat/parser/live.py
index c3e10b3..567bb19 100644
--- a/pytchat/parser/live.py
+++ b/pytchat/parser/live.py
@@ -28,11 +28,12 @@ class Parser:
     def get_contents(self, jsn):
         if jsn is None:
             self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
-        if jsn['response']['responseContext'].get('errors'):
+        if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
             raise exceptions.ResponseContextError(
                 'The video_id would be wrong, or video is deleted or private.')
-        contents = jsn['response'].get('continuationContents')
-        return contents
+        contents = jsn.get('continuationContents')
+        visitor_data = jsn.get("responseContext", {}).get("visitorData")
+        return contents, visitor_data
 
     def parse(self, contents):
         """
@@ -85,6 +86,7 @@ class Parser:
             '''Broadcasting end or cannot fetch chat stream'''
             self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
         cont = contents['liveChatContinuation']['continuations'][0]
+
         if cont.get("liveChatReplayContinuationData"):
             # chat data exist.
             return None
@@ -97,23 +99,22 @@ class Parser:
     def _create_data(self, metadata, contents):
         actions = contents['liveChatContinuation'].get('actions')
         if self.is_replay:
-            interval = self._get_interval(actions)
-            metadata.setdefault("timeoutMs", interval)
+            last_offset_ms = self._get_lastoffset(actions)
+            metadata.setdefault("timeoutMs", 5000)
+            metadata.setdefault("last_offset_ms", last_offset_ms)
             """Archived chat has different structures than live chat,
             so make it the same format."""
             chatdata = [action["replayChatItemAction"]["actions"][0]
                         for action in actions]
         else:
-            metadata.setdefault('timeoutMs', 10000)
+            metadata.setdefault('timeoutMs', 5000)
             chatdata = actions
         return metadata, chatdata
 
-    def _get_interval(self, actions: list):
-        if actions is None:
-            return 0
-        start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
-        last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
-        return (last - start)
+    def _get_lastoffset(self, actions: list):
+        if actions:
+            return int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
+        return 0
 
     def raise_exception(self, exception):
         if self.exception_holder is None:
diff --git a/pytchat/processors/compatible/renderer/base.py b/pytchat/processors/compatible/renderer/base.py
index 248a93a..fa10adf 100644
--- a/pytchat/processors/compatible/renderer/base.py
+++ b/pytchat/processors/compatible/renderer/base.py
@@ -1,5 +1,6 @@
-import datetime
-import pytz
+from datetime import datetime, timedelta, timezone
+
+TZ_UTC = timezone(timedelta(0), 'UTC')
 
 
 class BaseRenderer:
@@ -62,13 +63,13 @@ class BaseRenderer:
         if badges:
             for badge in badges:
                 author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
-                if author_type == '確認済み':
+                if author_type == 'VERIFIED' or author_type == '確認済み':
                     isVerified = True
-                if author_type == '所有者':
+                if author_type == 'OWNER' or author_type == '所有者':
                     isChatOwner = True
-                if 'メンバー' in author_type:
+                if 'メンバー' in author_type or 'MEMBER' in author_type:
                     isChatSponsor = True
-                if author_type == 'モデレーター':
+                if author_type == 'MODERATOR' or author_type == 'モデレーター':
                     isChatModerator = True
         return isVerified, isChatOwner, isChatSponsor, isChatModerator
 
@@ -76,6 +77,6 @@ class BaseRenderer:
         return self.renderer.get('id')
 
     def get_publishedat(self, timestamp):
-        dt = datetime.datetime.fromtimestamp(int(timestamp) / 1000000)
-        return dt.astimezone(pytz.utc).isoformat(
+        dt = datetime.fromtimestamp(int(timestamp) / 1000000)
+        return dt.astimezone(TZ_UTC).isoformat(
             timespec='milliseconds').replace('+00:00', 'Z')
diff --git a/pytchat/processors/compatible/renderer/currency.py b/pytchat/processors/compatible/renderer/currency.py
index 00d683c..30b2174 100644
--- a/pytchat/processors/compatible/renderer/currency.py
+++ b/pytchat/processors/compatible/renderer/currency.py
@@ -1,12 +1,12 @@
 '''
-YouTubeスーパーチャットで使用される通貨の記号とレート検索用の略号の
-対応表
+Table of symbols for the currencies used in YouTube Superchat.
+
 Key：
-    YouTubeスーパーチャットで使用される通貨の記号
-    （アルファベットで終わる場合、0xA0(&npsp)が付く）
+     Currency symbols used in YouTube Super Chat
+     If it ends with an alphabet, it will be followed by 0xA0(&npsp).
 Value:
-    fxtext: 3文字の通貨略称
-    jptest: 日本語テキスト
+    fxtext: ISO 4217 currency code
+    jptest: japanese text
 '''
 symbols = {
     "$": {"fxtext": "USD", "jptext": "米・ドル"},
diff --git a/pytchat/tool/extract/asyncdl.py b/pytchat/tool/extract/asyncdl.py
index a4c52e8..eea2499 100644
--- a/pytchat/tool/extract/asyncdl.py
+++ b/pytchat/tool/extract/asyncdl.py
@@ -1,6 +1,8 @@
 import asyncio
 import httpx
 import socket
+from concurrent.futures import CancelledError
+from json import JSONDecodeError
 from . import parser
 from . block import Block
 from . worker import ExtractWorker
@@ -8,18 +10,17 @@ from . patch import Patch
 from ... import config
 from ... paramgen import arcparam
 from ... exceptions import UnknownConnectionError
-from concurrent.futures import CancelledError
-from json import JSONDecodeError
-from urllib.parse import quote
+from ... util import get_param
 
 
 headers = config.headers
-REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
-             "get_live_chat_replay?continuation="
+smr = config._smr
+
 MAX_RETRY_COUNT = 3
 
 # Set to avoid duplicate parameters
-param_set = set()
+aquired_params = set()
+dat = ''
 
 
 def _split(start, end, count, min_interval_sec=120):
@@ -55,28 +56,30 @@ def _split(start, end, count, min_interval_sec=120):
 
 
 def ready_blocks(video_id, duration, div, callback):
-    param_set.clear()
+    aquired_params.clear()
     if div <= 0:
         raise ValueError
 
     async def _get_blocks(video_id, duration, div, callback):
-        async with httpx.AsyncClient(http2=True) as session:
+        async with httpx.AsyncClient(http2=True, headers=headers) as session:
             tasks = [_create_block(session, video_id, seektime, callback)
                      for seektime in _split(-1, duration, div)]
             return await asyncio.gather(*tasks)
 
     async def _create_block(session, video_id, seektime, callback):
         continuation = arcparam.getparam(video_id, seektime=seektime)
-        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
         err = None
+        last_offset = 0
+        global dat
         for _ in range(MAX_RETRY_COUNT):
             try:
-                if continuation in param_set:
+                if continuation in aquired_params:
                     next_continuation, actions = None, []
                     break
-                param_set.add(continuation)
-                resp = await session.get(url, headers=headers, timeout=10)
-                next_continuation, actions = parser.parse(resp.json())
+                aquired_params.add(continuation)
+                param = get_param(continuation, replay=True, offsetms=seektime * 1000, dat=dat)
+                resp = await session.post(smr, json=param, timeout=10)
+                next_continuation, actions, last_offset, dat = parser.parse(resp.json())
                 break
             except JSONDecodeError:
                 await asyncio.sleep(3)
@@ -88,15 +91,14 @@ def ready_blocks(video_id, duration, div, callback):
             raise UnknownConnectionError("Abort:" + str(err))
 
         if actions:
-            first = parser.get_offset(actions[0])
-            last = parser.get_offset(actions[-1])
+            first_offset = parser.get_offset(actions[0])
             if callback:
-                callback(actions, last - first)
+                callback(actions, last_offset - first_offset)
             return Block(
                 continuation=next_continuation,
                 chat_data=actions,
-                first=first,
-                last=last
+                first=first_offset,
+                last=last_offset
             )
 
     """
@@ -122,17 +124,19 @@ def fetch_patch(callback, blocks, video_id):
             tasks = [worker.run(session) for worker in workers]
             return await asyncio.gather(*tasks)
 
-    async def _fetch(continuation, session) -> Patch:
-        url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
+    async def _fetch(continuation, last_offset, session=None) -> Patch:
+        global dat
         err = None
         for _ in range(MAX_RETRY_COUNT):
             try:
-                if continuation in param_set:
+                if continuation in aquired_params:
                     continuation, actions = None, []
                     break
-                param_set.add(continuation)
-                resp = await session.get(url, headers=config.headers)
-                continuation, actions = parser.parse(resp.json())
+                aquired_params.add(continuation)
+                params = get_param(continuation, replay=True, offsetms=last_offset, dat=dat)
+                # util.save(json.dumps(params, ensure_ascii=False), "v:/~~/param_"+str(last_offset), ".json")
+                resp = await session.post(smr, json=params)
+                continuation, actions, last_offset, dat = parser.parse(resp.json())
                 break
             except JSONDecodeError:
                 await asyncio.sleep(3)
@@ -147,7 +151,7 @@ def fetch_patch(callback, blocks, video_id):
             raise UnknownConnectionError("Abort:" + str(err))
 
         if actions:
-            last = parser.get_offset(actions[-1])
+            last = last_offset
             first = parser.get_offset(actions[0])
             if callback:
                 callback(actions, last - first)
diff --git a/pytchat/tool/extract/parser.py b/pytchat/tool/extract/parser.py
index 2866af2..d9b2cc8 100644
--- a/pytchat/tool/extract/parser.py
+++ b/pytchat/tool/extract/parser.py
@@ -19,10 +19,10 @@ def parse(jsn):
     """
     if jsn is None:
         raise ValueError("parameter JSON is None")
-    if jsn['response']['responseContext'].get('errors'):
+    if jsn.get("error") or jsn.get("responseContext", {}).get("errors"):
         raise exceptions.ResponseContextError(
             'video_id is invalid or private/deleted.')
-    contents = jsn['response'].get('continuationContents')
+    contents = jsn.get('continuationContents')
     if contents is None:
         raise exceptions.NoContents('No chat data.')
 
@@ -31,13 +31,15 @@ def parse(jsn):
         raise exceptions.NoContinuation('No Continuation')
     metadata = cont.get('liveChatReplayContinuationData')
     if metadata:
+        visitor_data = jsn.get("responseContext", {}).get("visitorData", '')
         continuation = metadata.get("continuation")
-        actions = contents['liveChatContinuation'].get('actions')
-        return continuation, actions
-    return None, []
+        actions: list = contents['liveChatContinuation'].get('actions')
+        last_offset: int = get_offset(actions[-1]) if actions else 0
+        return continuation, actions, last_offset, visitor_data
+    return None, [], 0, ''
 
 
-def get_offset(item):
+def get_offset(item) -> int:
     return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
 
 
diff --git a/pytchat/tool/extract/worker.py b/pytchat/tool/extract/worker.py
index 261de10..5216451 100644
--- a/pytchat/tool/extract/worker.py
+++ b/pytchat/tool/extract/worker.py
@@ -38,7 +38,7 @@ class ExtractWorker:
     async def run(self, session):
         while self.block.continuation:
             patch = await self.fetch(
-                self.block.continuation, session)
+                self.block.continuation, self.block.last, session)
             if patch.continuation is None:
                 """TODO : make the worker assigned to the last block
                 to work more than twice as possible.
diff --git a/pytchat/util/__init__.py b/pytchat/util/__init__.py
index acb984b..66cdd96 100644
--- a/pytchat/util/__init__.py
+++ b/pytchat/util/__init__.py
@@ -4,9 +4,18 @@ import json
 import os
 import re
 from .. import config
+from .. exceptions import InvalidVideoIdException
 
 PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
 
+PATTERN_YTURL = re.compile(r"((?<=(v|V)/)|(?<=be/)|(?<=(\?|\&)v=)|(?<=embed/))([\w-]+)")
+
+YT_VIDEO_ID_LENGTH = 11
+
+CLIENT_VERSION = ''.join(("2.", (datetime.datetime.today() - datetime.timedelta(days=1)).strftime("%Y%m%d"), ".01.00"))
+
+UA = config.headers["user-agent"]
+
 
 def extract(url):
     _session = httpx.Client(http2=True)
@@ -17,8 +26,9 @@ def extract(url):
 
 
 def save(data, filename, extention) -> str:
-    save_filename = filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
-    with open(save_filename ,mode='w', encoding='utf-8') as f:
+    save_filename = filename + "_" + \
+        (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
+    with open(save_filename, mode='w', encoding='utf-8') as f:
         f.writelines(data)
     return save_filename
 
@@ -39,3 +49,46 @@ def checkpath(filepath):
             body = f'{body}({str(counter)})'
         newpath = os.path.join(os.path.dirname(filepath), body + extention)
     return newpath
+
+
+def get_param(continuation, replay=False, offsetms: int = 0, dat=''):
+    if offsetms < 0:
+        offsetms = 0
+    ret = {
+        "context": {
+            "client": {
+                "visitorData": dat,
+                "userAgent": UA,
+                "clientName": "WEB",
+                "clientVersion": CLIENT_VERSION,
+            },
+
+        },
+        "continuation": continuation,
+    }
+    if replay:
+        ret.setdefault("currentPlayerState", {
+                       "playerOffsetMs": str(int(offsetms))})
+    return ret
+
+
+def extract_video_id(url_or_id: str) -> str:
+    ret = ''
+    if '[' in url_or_id:
+        url_or_id = url_or_id.replace('[', '').replace(']', '')
+
+    if type(url_or_id) != str:
+        raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
+    if len(url_or_id) == YT_VIDEO_ID_LENGTH:
+        return url_or_id
+    match = re.search(PATTERN_YTURL, url_or_id)
+    if match is None:
+        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
+    try:
+        ret = match.group(4)
+    except IndexError:
+        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
+
+    if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
+        raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
+    return ret