import asyncio import aiohttp,async_timeout import json import traceback,time from urllib.parse import quote from . import parser from .. import config from .. import util from .. paramgen import arcparam logger = config.logger(__name__) headers=config.headers REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \ "get_live_chat_replay?continuation=" class Block: def __init__(self, pos=0, init_offset=0, last_offset=0, continuation='', chat_data=[]): self.pos = pos self.init_offset = init_offset self.last_offset = last_offset self.stop_offset = 0 self.continuation = continuation self.chat_data = chat_data def _debug_save(_pbar_pos,prefix,init_offset_ms,last_offset_ms,dics): chat_data =[] init = '{:0>8}'.format(str(init_offset_ms)) last = '{:0>8}'.format(str(last_offset_ms)) chat_data.extend(dics["response"]["continuationContents"]["liveChatContinuation"]["actions"]) with open(f"[{_pbar_pos}]-{prefix}-from_{init}_to_{last}.data",mode ='w',encoding='utf-8') as f: f.writelines(chat_data) def dump(o): for key, value in o.__dict__.items(): if key != "chat_data": print(key, ':', value) def dumpt(blocks,mes = None): print(f"{'-'*40}\n{mes}") [print(f"pos:{b.pos:>2} |init:{b.init_offset: >12,} |last:{b.last_offset: >12,} |stop:{b.stop_offset :>12,} ") for b in blocks] def _divide_(start, end, count): min_interval = 120 if (not isinstance(start,int) or not isinstance(end,int) or not isinstance(count,int)): raise ValueError("start/end/count must be int") if start>end: raise ValueError("end must be equal to or greater than start.") if count<1: raise ValueError("count must be equal to or greater than 1.") if (end-start)/count < min_interval: count = int((end-start)/min_interval) if count == 0 : count = 1 interval= (end-start)/count if count == 1: return [start] return sorted(list(set([int(start+interval*j) for j in range(count) ]))) def ready_blocks(video_id:str, div:int, duration:int): if div <= 0: raise ValueError def _divide(start, end, count): if (not isinstance(start,int) or not isinstance(end,int) or not isinstance(count,int)): raise ValueError("start/end/count must be int") if start>end: raise ValueError("end must be equal to or greater than start.") if count<1: raise ValueError("count must be equal to or greater than 1.") interval= (end-start)/(count) if interval < 120 : interval=120 count = int((end-start)/interval)+1 if count == 1: return [start] return sorted(list(set([int(start+interval*j) if j < count else end for j in range(count) ]))) async def _get_blocks(duration,div): async with aiohttp.ClientSession() as session: futures = [_create_block(session, pos, seektime) for pos, seektime in enumerate(_divide(-1, duration , div))] return await asyncio.gather(*futures,return_exceptions=True) async def _create_block(session, pos, seektime): continuation = arcparam.getparam(video_id,seektime=seektime) url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" async with session.get(url,headers = headers) as resp: text = await resp.text() #util.save(text,f"v:/~~/pre_{pos}_",".json") next_continuation, actions = parser.parse(json.loads(text)) block = Block( pos = pos, continuation = next_continuation, chat_data = actions, init_offset = parser.get_offset(actions[0]), last_offset = parser.get_offset(actions[-1]) ) return block blocks=[] loop = asyncio.get_event_loop() blocks = loop.run_until_complete(_get_blocks(duration,div)) return blocks def remove_duplicate_head(blocks): def is_same_offset(index): return (blocks[index].init_offset == blocks[index+1].init_offset) def is_same_id(index): id_0 = parser.get_id(blocks[index].chat_data[0]) id_1 = parser.get_id(blocks[index+1].chat_data[0]) return (id_0 == id_1) def is_same_type(index): type_0 = parser.get_type(blocks[index].chat_data[0]) type_1 = parser.get_type(blocks[index+1].chat_data[0]) return (type_0 == type_1) ret = [] [ret.append(blocks[i]) for i in range(len(blocks)-1) if (len(blocks[i].chat_data)>0 and not ( is_same_offset(i) and is_same_id(i) and is_same_type(i)))] ret.append(blocks[-1]) blocks = None return ret def remove_overwrap(blocks): def is_overwrap(a, b): print(f"comparing({a}, {b})....overwrap ({(blocks[a].last_offset > blocks[b].init_offset)})") return (blocks[a].last_offset > blocks[b].init_offset) ret = [] a = 0 b=1 jmp = False ret.append(blocks[0]) while a < len(blocks)-2: while is_overwrap(a,b): b+=1 print("forward") if b == len(blocks)-2: jmp=True break if jmp: break if b-a == 1: print(f"next ret.append(blocks[{b}]") ret.append(blocks[b]) a = b b+=1 continue else: print(f"apart ret.append(blocks[{b-1}]") ret.append(blocks[b-1]) a=b-1 b=a+1 ret.append(blocks[-1]) return ret def remove_duplicate_tail(blocks): def is_same_offset(index): return blocks[index-1].init_offset == blocks[index].init_offset def is_same_id(index): id_0 = parser.get_id(blocks[index-1].chat_data[-1]) id_1 = parser.get_id(blocks[index].chat_data[-1]) return id_0 == id_1 def is_same_type(index): type_0 = parser.get_type(blocks[index-1].chat_data[-1]) type_1 = parser.get_type(blocks[index].chat_data[-1]) return type_0 == type_1 ret = [] [ret.append(blocks[i]) for i in range(len(blocks)-1) if not ( is_same_offset(i) and is_same_id(i) and is_same_type(i) )] ret.append(blocks[-1]) blocks = None return ret def set_stop_offset(blocks): for i in range(len(blocks)-1): blocks[i].stop_offset = blocks[i+1].init_offset blocks[-1].stop_offset = -1 return blocks def download_each_block(blocks): loop = asyncio.get_event_loop() return loop.run_until_complete(_dl_block(blocks)) async def _dl_block(blocks): futures = [] async with aiohttp.ClientSession() as session: futures = [_dl_chunk(session, block) for block in blocks] return await asyncio.gather(*futures,return_exceptions=True) async def _dl_chunk(session, block:Block): if (block.stop_offset != -1 and block.last_offset > block.stop_offset): return def get_last_offset(actions): return parser.get_offset(actions[-1]) continuation = block.continuation while continuation: print(block.pos) url = f"{REPLAY_URL}{quote(continuation)}&pbj=1" async with session.get(url,headers = config.headers) as resp: text = await resp.text() continuation, actions = parser.parse(json.loads(text)) if actions: block.chat_data.extend(actions) last_offset = get_last_offset(actions) if block.stop_offset != -1: if last_offset > block.stop_offset: block.last_offset = last_offset break else: block.last_offset = last_offset def combine(blocks): line = '' try: if len(blocks[0].chat_data)>0: lastline=blocks[0].chat_data[-1] lastline_offset = parser.get_offset(lastline) else: return None for i in range(1,len(blocks)): f=blocks[i].chat_data if len(f)==0: logger.error(f'zero size piece.:{str(i)}') continue for row in range(len(f)): line = f[row] if parser.get_offset(line) > lastline_offset: blocks[0].chat_data.extend(f[row:]) break if line =='error': logger.error(f'Error file was saved.: piece:{str(i)}') return['error'] else: logger.error(f'Missing common line.: piece:{str(i-1)}->{str(i)} lastline_id= {lastline_offset}') return ['combination failed'] lastline_offset = parser.get_offset( f[-1]) return blocks[0].chat_data except Exception as e: logger.error(f"{type(e)} {str(e)} {line}") traceback.print_exc() def download(video_id, duration, div): blocks = ready_blocks(video_id=video_id, duration=duration, div=div) dumpt(blocks,"ready_blocks") selected = remove_duplicate_head(blocks) dumpt(selected,"removed duplicate_head") set_stop_offset(selected) dumpt(selected,"set stop_offset") #set_stop_offset(selected) removed = remove_overwrap(selected) dumpt(removed,"removed overwrap") download_each_block(removed) dumpt(removed,"downloaded each_block") return combine(removed)