Compare commits
141 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c817b6476 | ||
|
|
18b88200a8 | ||
|
|
c95d70a232 | ||
|
|
7640586591 | ||
|
|
f7ec14e166 | ||
|
|
a4dacdb7d7 | ||
|
|
785a82b618 | ||
|
|
faf886eebd | ||
|
|
8a627414cb | ||
|
|
d14262cbcb | ||
|
|
da7c694dfb | ||
|
|
9aa35b9756 | ||
|
|
f0a1a509a0 | ||
|
|
5ebca605ac | ||
|
|
3826b32ab9 | ||
|
|
a46c82d3c0 | ||
|
|
206d052907 | ||
|
|
141d7a9299 | ||
|
|
04457eaa5c | ||
|
|
bd32c75833 | ||
|
|
84bae4ad2a | ||
|
|
d72608bf0a | ||
|
|
3243d69d7a | ||
|
|
6e1b735ebc | ||
|
|
c54481dad5 | ||
|
|
78604c84d4 | ||
|
|
21d93613a2 | ||
|
|
56bf721330 | ||
|
|
5f50598f79 | ||
|
|
5e8c438c6b | ||
|
|
23e47f6fb0 | ||
|
|
74dfe0a612 | ||
|
|
725af25d81 | ||
|
|
316fc5594a | ||
|
|
44dffc7650 | ||
|
|
102d8c48c4 | ||
|
|
f8822a053f | ||
|
|
9d624f771a | ||
|
|
778d4db28b | ||
|
|
36e0fd5c54 | ||
|
|
4252643273 | ||
|
|
c88fd8bc4e | ||
|
|
af3b6d4271 | ||
|
|
331e825c97 | ||
|
|
4019ad4b9d | ||
|
|
1074178afc | ||
|
|
55a58f532d | ||
|
|
b302454083 | ||
|
|
ff9e7de796 | ||
|
|
fe2047502a | ||
|
|
5480e3e9ed | ||
|
|
18c08f45ad | ||
|
|
a9831c6a27 | ||
|
|
60976b2584 | ||
|
|
92abf7499c | ||
|
|
4416e1a79c | ||
|
|
f7f9c1cda3 | ||
|
|
de35537be8 | ||
|
|
61d4e06470 | ||
|
|
3c95242ddf | ||
|
|
af4afb4636 | ||
|
|
05e1c908a5 | ||
|
|
e770d95fe8 | ||
|
|
eae485b914 | ||
|
|
d8c1c4491d | ||
|
|
3e941c2cf1 | ||
|
|
8b617551ad | ||
|
|
c4cf424702 | ||
|
|
6fdb3bf8cf | ||
|
|
b1292b4329 | ||
|
|
339d04ad75 | ||
|
|
abb7565e3a | ||
|
|
ee77807dbd | ||
|
|
2c598bc8f7 | ||
|
|
c7bfae9f2a | ||
|
|
eaa7bdc8b6 | ||
|
|
4a8e353098 | ||
|
|
24f08ecbdb | ||
|
|
e8510f1116 | ||
|
|
f1d8393971 | ||
|
|
04aedc82e8 | ||
|
|
228773295d | ||
|
|
59defc568c | ||
|
|
9de75788f2 | ||
|
|
76f0c0e658 | ||
|
|
0d8ecb778f | ||
|
|
a3eca8f05d | ||
|
|
bbf7a2906a | ||
|
|
1862b83eac | ||
|
|
053ff5291f | ||
|
|
4e47d4a262 | ||
|
|
436e8df4c9 | ||
|
|
5ab8cfe736 | ||
|
|
15b517e905 | ||
|
|
214a3d2be3 | ||
|
|
e968325d1f | ||
|
|
a56dc89477 | ||
|
|
38253e1d18 | ||
|
|
cc78551e90 | ||
|
|
6e37ef5d4f | ||
|
|
c126d5b825 | ||
|
|
a89503fe9e | ||
|
|
1d7678c954 | ||
|
|
dea98c33d7 | ||
|
|
5ba61db4f3 | ||
|
|
03b901d59c | ||
|
|
540f16c1a0 | ||
|
|
cc8bba8f63 | ||
|
|
22b3ec2994 | ||
|
|
9d494446e1 | ||
|
|
956c7e2640 | ||
|
|
03537c0a06 | ||
|
|
f7d1830226 | ||
|
|
76b126faf2 | ||
|
|
bbd01d6523 | ||
|
|
f8fa0e394e | ||
|
|
efdf07e3de | ||
|
|
2573cc18de | ||
|
|
1c5852421b | ||
|
|
970d111e1b | ||
|
|
1643dd1ad1 | ||
|
|
0272319fa6 | ||
|
|
fb0edef136 | ||
|
|
260a2b35a9 | ||
|
|
e03d39475e | ||
|
|
2462b8aca0 | ||
|
|
a1024c8734 | ||
|
|
6b3ca00d35 | ||
|
|
385634b709 | ||
|
|
c1a78a2743 | ||
|
|
7961801e0c | ||
|
|
5fe4e7af04 | ||
|
|
892dfb8a91 | ||
|
|
fddab22a1f | ||
|
|
7194948066 | ||
|
|
a836d92194 | ||
|
|
c408cb2713 | ||
|
|
c3d2238ead | ||
|
|
6c8d390fc7 | ||
|
|
ff1ee70d7e | ||
|
|
404623546e |
@@ -1,7 +1,5 @@
|
|||||||
include requirements.txt
|
include requirements.txt
|
||||||
include requirements_test.txt
|
include requirements_test.txt
|
||||||
prune testrun*.py
|
include README.md
|
||||||
prune log.txt
|
global-exclude tests/*
|
||||||
prune quote.txt
|
global-exclude pytchat/testrun*.py
|
||||||
prune .gitignore
|
|
||||||
prun tests
|
|
||||||
123
README.md
123
README.md
@@ -7,32 +7,52 @@ pytchat is a python library for fetching youtube live chat.
|
|||||||
pytchat is a python library for fetching youtube live chat
|
pytchat is a python library for fetching youtube live chat
|
||||||
without using youtube api, Selenium or BeautifulSoup.
|
without using youtube api, Selenium or BeautifulSoup.
|
||||||
|
|
||||||
|
pytchatはAPIを使わずにYouTubeチャットを取得するためのpythonライブラリです。
|
||||||
|
|
||||||
Other features:
|
Other features:
|
||||||
+ Customizable chat data processors including youtube api compatible one.
|
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
|
||||||
+ Available on asyncio context.
|
+ Available on asyncio context.
|
||||||
+ Quick fetching of initial chat data by generating continuation params
|
+ Quick fetching of initial chat data by generating continuation params
|
||||||
instead of web scraping.
|
instead of web scraping.
|
||||||
|
|
||||||
For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki).
|
For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki). <br>
|
||||||
|
より詳細な解説は[wiki](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)を参照してください。
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
```python
|
```python
|
||||||
pip install pytchat
|
pip install pytchat
|
||||||
```
|
```
|
||||||
## Demo
|
|
||||||

|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
|
||||||
|
One-liner command.
|
||||||
|
Save chat data to html.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ pytchat -v ZJ6Q4U_Vg6s -o "c:/temp/"
|
||||||
|
|
||||||
|
# options:
|
||||||
|
# -v : video_id
|
||||||
|
# -o : output directory (default path: './')
|
||||||
|
# saved filename is [video_id].html
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### on-demand mode
|
### on-demand mode
|
||||||
```python
|
```python
|
||||||
from pytchat import LiveChat
|
from pytchat import LiveChat
|
||||||
|
livechat = LiveChat(video_id = "Zvp1pJpie4I")
|
||||||
|
|
||||||
chat = LiveChat("rsHWP7IjMiw")
|
while livechat.is_alive():
|
||||||
while chat.is_alive():
|
try:
|
||||||
data = chat.get()
|
chatdata = livechat.get()
|
||||||
for c in data.items:
|
for c in chatdata.items:
|
||||||
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
|
print(f"{c.datetime} [{c.author.name}]- {c.message}")
|
||||||
data.tick()
|
chatdata.tick()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
livechat.terminate()
|
||||||
|
break
|
||||||
```
|
```
|
||||||
|
|
||||||
### callback mode
|
### callback mode
|
||||||
@@ -40,17 +60,21 @@ while chat.is_alive():
|
|||||||
from pytchat import LiveChat
|
from pytchat import LiveChat
|
||||||
import time
|
import time
|
||||||
|
|
||||||
#callback function is automatically called.
|
def main():
|
||||||
def display(data):
|
livechat = LiveChat(video_id = "Zvp1pJpie4I", callback = disp)
|
||||||
for c in data.items:
|
while livechat.is_alive():
|
||||||
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
|
#other background operation.
|
||||||
data.tick()
|
time.sleep(1)
|
||||||
|
livechat.terminate()
|
||||||
|
|
||||||
|
#callback function (automatically called)
|
||||||
|
def disp(chatdata):
|
||||||
|
for c in chatdata.items:
|
||||||
|
print(f"{c.datetime} [{c.author.name}]- {c.message}")
|
||||||
|
chatdata.tick()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
chat = LiveChat("rsHWP7IjMiw", callback = display)
|
main()
|
||||||
while chat.is_alive():
|
|
||||||
#other background operation.
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -61,16 +85,16 @@ from concurrent.futures import CancelledError
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
chat = LiveChatAsync("rsHWP7IjMiw", callback = func)
|
livechat = LiveChatAsync("Zvp1pJpie4I", callback = func)
|
||||||
while chat.is_alive():
|
while livechat.is_alive():
|
||||||
#other background operation.
|
#other background operation.
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
#callback function is automatically called.
|
#callback function is automatically called.
|
||||||
async def func(data):
|
async def func(chatdata):
|
||||||
for c in data.items:
|
for c in chatdata.items:
|
||||||
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
|
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
|
||||||
await data.tick_async()
|
await chatdata.tick_async()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
@@ -86,18 +110,20 @@ if __name__ == '__main__':
|
|||||||
from pytchat import LiveChat, CompatibleProcessor
|
from pytchat import LiveChat, CompatibleProcessor
|
||||||
import time
|
import time
|
||||||
|
|
||||||
chat = LiveChat("rsHWP7IjMiw",
|
chat = LiveChat("Zvp1pJpie4I",
|
||||||
processor = CompatibleProcessor() )
|
processor = CompatibleProcessor() )
|
||||||
|
|
||||||
while chat.is_alive():
|
while chat.is_alive():
|
||||||
data = chat.get()
|
try:
|
||||||
polling = data['pollingIntervalMillis']/1000
|
data = chat.get()
|
||||||
for c in data['items']:
|
polling = data['pollingIntervalMillis']/1000
|
||||||
if c.get('snippet'):
|
for c in data['items']:
|
||||||
print(f"[{c['authorDetails']['displayName']}]"
|
if c.get('snippet'):
|
||||||
f"-{c['snippet']['displayMessage']}")
|
print(f"[{c['authorDetails']['displayName']}]"
|
||||||
time.sleep(polling/len(data['items']))
|
f"-{c['snippet']['displayMessage']}")
|
||||||
|
time.sleep(polling/len(data['items']))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
chat.terminate()
|
||||||
```
|
```
|
||||||
### replay:
|
### replay:
|
||||||
If specified video is not live,
|
If specified video is not live,
|
||||||
@@ -108,19 +134,23 @@ from pytchat import LiveChat
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
#seektime (seconds): start position of chat.
|
#seektime (seconds): start position of chat.
|
||||||
chat = ReplayChat("ojes5ULOqhc", seektime = 60*30)
|
chat = LiveChat("ojes5ULOqhc", seektime = 60*30)
|
||||||
while chat.is_alive():
|
print('Replay from 30:00')
|
||||||
data = chat.get()
|
try:
|
||||||
for c in data.items:
|
while chat.is_alive():
|
||||||
print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}")
|
data = chat.get()
|
||||||
data.tick()
|
for c in data.items:
|
||||||
|
print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}")
|
||||||
|
data.tick()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
chat.terminate()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
```
|
```
|
||||||
|
|
||||||
## Structure of Default Processor
|
## Structure of Default Processor
|
||||||
Each item can be got with items() function.
|
Each item can be got with `items` function.
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<th>name</th>
|
<th>name</th>
|
||||||
@@ -246,8 +276,17 @@ Structure of author object.
|
|||||||
|
|
||||||
[](LICENSE)
|
[](LICENSE)
|
||||||
|
|
||||||
|
|
||||||
|
## Contributes
|
||||||
|
Great thanks:
|
||||||
|
|
||||||
|
Most of source code of CLI refer to:
|
||||||
|
|
||||||
|
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
|
||||||
|
|
||||||
|
|
||||||
## Author
|
## Author
|
||||||
|
|
||||||
[taizan-hokuto](https://github.com/taizan-hokuto)
|
[taizan-hokuto](https://github.com/taizan-hokuto)
|
||||||
|
|
||||||
[twitter:@taizan205](https://twitter.com/taizan205)
|
[twitter:@taizan205](https://twitter.com/taizan205)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
|
pytchat is a python library for fetching youtube live chat without using yt api, Selenium, or BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
|
||||||
__version__ = '0.0.4.4'
|
__version__ = '0.0.7.2'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'taizan-hokuto'
|
__author__ = 'taizan-hokuto'
|
||||||
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
|
||||||
@@ -11,16 +11,20 @@ __url__ = 'https://github.com/taizan-hokuto/pytchat'
|
|||||||
__all__ = ["core_async","core_multithread","processors"]
|
__all__ = ["core_async","core_multithread","processors"]
|
||||||
|
|
||||||
from .api import (
|
from .api import (
|
||||||
|
cli,
|
||||||
config,
|
config,
|
||||||
LiveChat,
|
LiveChat,
|
||||||
LiveChatAsync,
|
LiveChatAsync,
|
||||||
ReplayChat,
|
|
||||||
ReplayChatAsync,
|
|
||||||
ChatProcessor,
|
ChatProcessor,
|
||||||
CompatibleProcessor,
|
CompatibleProcessor,
|
||||||
DefaultProcessor,
|
DummyProcessor,
|
||||||
|
DefaultProcessor,
|
||||||
|
Extractor,
|
||||||
|
HTMLArchiver,
|
||||||
|
TSVArchiver,
|
||||||
|
JsonfileArchiver,
|
||||||
SimpleDisplayProcessor,
|
SimpleDisplayProcessor,
|
||||||
JsonfileArchiveProcessor,
|
|
||||||
SpeedCalculator,
|
SpeedCalculator,
|
||||||
DummyProcessor
|
SuperchatCalculator,
|
||||||
|
VideoInfo
|
||||||
)
|
)
|
||||||
@@ -1,12 +1,16 @@
|
|||||||
|
from . import cli
|
||||||
|
from . import config
|
||||||
from .core_multithread.livechat import LiveChat
|
from .core_multithread.livechat import LiveChat
|
||||||
from .core_async.livechat import LiveChatAsync
|
from .core_async.livechat import LiveChatAsync
|
||||||
from .core_multithread.replaychat import ReplayChat
|
|
||||||
from .core_async.replaychat import ReplayChatAsync
|
|
||||||
from .processors.chat_processor import ChatProcessor
|
from .processors.chat_processor import ChatProcessor
|
||||||
from .processors.default.processor import DefaultProcessor
|
|
||||||
from .processors.compatible.processor import CompatibleProcessor
|
from .processors.compatible.processor import CompatibleProcessor
|
||||||
from .processors.simple_display_processor import SimpleDisplayProcessor
|
from .processors.default.processor import DefaultProcessor
|
||||||
from .processors.jsonfile_archive_processor import JsonfileArchiveProcessor
|
|
||||||
from .processors.speed_calculator import SpeedCalculator
|
|
||||||
from .processors.dummy_processor import DummyProcessor
|
from .processors.dummy_processor import DummyProcessor
|
||||||
from . import config
|
from .processors.html_archiver import HTMLArchiver
|
||||||
|
from .processors.tsv_archiver import TSVArchiver
|
||||||
|
from .processors.jsonfile_archiver import JsonfileArchiver
|
||||||
|
from .processors.simple_display_processor import SimpleDisplayProcessor
|
||||||
|
from .processors.speed.calculator import SpeedCalculator
|
||||||
|
from .processors.superchat.calculator import SuperchatCalculator
|
||||||
|
from .tool.extract.extractor import Extractor
|
||||||
|
from .tool.videoinfo import VideoInfo
|
||||||
|
|||||||
60
pytchat/cli/__init__.py
Normal file
60
pytchat/cli/__init__.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Callable
|
||||||
|
from .arguments import Arguments
|
||||||
|
|
||||||
|
from .. exceptions import InvalidVideoIdException, NoContentsException
|
||||||
|
from .. processors.tsv_archiver import TSVArchiver
|
||||||
|
from .. processors.html_archiver import HTMLArchiver
|
||||||
|
from .. tool.extract.extractor import Extractor
|
||||||
|
from .. tool.videoinfo import VideoInfo
|
||||||
|
from .. import __version__
|
||||||
|
|
||||||
|
'''
|
||||||
|
Most of CLI modules refer to
|
||||||
|
Petter Kraabøl's Twitch-Chat-Downloader
|
||||||
|
https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
||||||
|
(MIT License)
|
||||||
|
|
||||||
|
'''
|
||||||
|
def main():
|
||||||
|
# Arguments
|
||||||
|
parser = argparse.ArgumentParser(description=f'pytchat v{__version__}')
|
||||||
|
parser.add_argument('-v', f'--{Arguments.Name.VIDEO}', type=str,
|
||||||
|
help='Video IDs separated by commas without space.\n'
|
||||||
|
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
|
||||||
|
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
|
||||||
|
help='Output directory (end with "/"). default="./"', default='./')
|
||||||
|
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
|
||||||
|
help='Settings version')
|
||||||
|
Arguments(parser.parse_args().__dict__)
|
||||||
|
if Arguments().print_version:
|
||||||
|
print(f'pytchat v{__version__}')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extractor
|
||||||
|
if Arguments().video_ids:
|
||||||
|
for video_id in Arguments().video_ids:
|
||||||
|
if '[' in video_id:
|
||||||
|
video_id = video_id.replace('[','').replace(']','')
|
||||||
|
try:
|
||||||
|
info = VideoInfo(video_id)
|
||||||
|
print(f"Extracting...\n"
|
||||||
|
f" video_id: {video_id}\n"
|
||||||
|
f" channel: {info.get_channel_name()}\n"
|
||||||
|
f" title: {info.get_title()}")
|
||||||
|
path = Path(Arguments().output+video_id+'.html')
|
||||||
|
print(f"output path: {path.resolve()}")
|
||||||
|
Extractor(video_id,
|
||||||
|
processor = HTMLArchiver(Arguments().output+video_id+'.html'),
|
||||||
|
callback = _disp_progress
|
||||||
|
).extract()
|
||||||
|
print("\nExtraction end.\n")
|
||||||
|
except (InvalidVideoIdException, NoContentsException) as e:
|
||||||
|
print(e)
|
||||||
|
return
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
|
def _disp_progress(a,b):
|
||||||
|
print('.',end="",flush=True)
|
||||||
39
pytchat/cli/arguments.py
Normal file
39
pytchat/cli/arguments.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from typing import Optional, Dict, Union, List
|
||||||
|
from .singleton import Singleton
|
||||||
|
|
||||||
|
'''
|
||||||
|
This modules refer to
|
||||||
|
Petter Kraabøl's Twitch-Chat-Downloader
|
||||||
|
https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
||||||
|
(MIT License)
|
||||||
|
'''
|
||||||
|
|
||||||
|
class Arguments(metaclass=Singleton):
|
||||||
|
"""
|
||||||
|
Arguments singleton
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Name:
|
||||||
|
VERSION: str = 'version'
|
||||||
|
OUTPUT: str = 'output'
|
||||||
|
VIDEO: str = 'video'
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
|
||||||
|
"""
|
||||||
|
Initialize arguments
|
||||||
|
:param arguments: Arguments from cli
|
||||||
|
(Optional to call singleton instance without parameters)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if arguments is None:
|
||||||
|
print('Error: arguments were not provided')
|
||||||
|
exit()
|
||||||
|
|
||||||
|
self.print_version: bool = arguments[Arguments.Name.VERSION]
|
||||||
|
self.output: str = arguments[Arguments.Name.OUTPUT]
|
||||||
|
self.video_ids: List[int] = []
|
||||||
|
# Videos
|
||||||
|
if arguments[Arguments.Name.VIDEO]:
|
||||||
|
self.video_ids = [video_id
|
||||||
|
for video_id in arguments[Arguments.Name.VIDEO].split(',')]
|
||||||
19
pytchat/cli/singleton.py
Normal file
19
pytchat/cli/singleton.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
'''
|
||||||
|
This modules refer to
|
||||||
|
Petter Kraabøl's Twitch-Chat-Downloader
|
||||||
|
https://github.com/PetterKraabol/Twitch-Chat-Downloader
|
||||||
|
(MIT License)
|
||||||
|
'''
|
||||||
|
class Singleton(type):
|
||||||
|
"""
|
||||||
|
Abstract class for singletons
|
||||||
|
"""
|
||||||
|
_instances = {}
|
||||||
|
|
||||||
|
def __call__(cls, *args, **kwargs):
|
||||||
|
if cls not in cls._instances:
|
||||||
|
cls._instances[cls] = super().__call__(*args, **kwargs)
|
||||||
|
return cls._instances[cls]
|
||||||
|
|
||||||
|
def get_instance(cls, *args, **kwargs):
|
||||||
|
cls.__call__(*args, **kwargs)
|
||||||
@@ -1,13 +1,11 @@
|
|||||||
import logging
|
import logging
|
||||||
from . import mylogger
|
from . import mylogger
|
||||||
|
|
||||||
LOGGER_MODE = None
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
|
||||||
|
|
||||||
def logger(module_name: str):
|
def logger(module_name: str, loglevel = None):
|
||||||
module_logger = mylogger.get_logger(module_name, mode = LOGGER_MODE)
|
module_logger = mylogger.get_logger(module_name, loglevel = loglevel)
|
||||||
return module_logger
|
return module_logger
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,23 @@
|
|||||||
from logging import NullHandler, getLogger, StreamHandler, FileHandler, Formatter
|
from logging import NullHandler, getLogger, StreamHandler, FileHandler, Formatter
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def get_logger(modname,mode=logging.DEBUG):
|
def get_logger(modname,loglevel=logging.DEBUG):
|
||||||
logger = getLogger(modname)
|
logger = getLogger(modname)
|
||||||
if mode == None:
|
if loglevel == None:
|
||||||
logger.addHandler(NullHandler())
|
logger.addHandler(NullHandler())
|
||||||
return logger
|
return logger
|
||||||
logger.setLevel(mode)
|
logger.setLevel(loglevel)
|
||||||
#create handler1 for showing info
|
#create handler1 for showing info
|
||||||
handler1 = StreamHandler()
|
handler1 = StreamHandler()
|
||||||
my_formatter = MyFormatter()
|
my_formatter = MyFormatter()
|
||||||
handler1.setFormatter(my_formatter)
|
handler1.setFormatter(my_formatter)
|
||||||
|
|
||||||
handler1.setLevel(mode)
|
handler1.setLevel(loglevel)
|
||||||
logger.addHandler(handler1)
|
logger.addHandler(handler1)
|
||||||
#create handler2 for recording log file
|
#create handler2 for recording log file
|
||||||
if mode <= logging.DEBUG:
|
if loglevel <= logging.DEBUG:
|
||||||
handler2 = FileHandler(filename="log.txt", encoding='utf-8')
|
handler2 = FileHandler(filename="log.txt", encoding='utf-8')
|
||||||
handler2.setLevel(logging.ERROR)
|
handler2.setLevel(logging.ERROR)
|
||||||
handler2.setFormatter(my_formatter)
|
handler2.setFormatter(my_formatter)
|
||||||
@@ -28,5 +28,11 @@ def get_logger(modname,mode=logging.DEBUG):
|
|||||||
|
|
||||||
class MyFormatter(logging.Formatter):
|
class MyFormatter(logging.Formatter):
|
||||||
def format(self, record):
|
def format(self, record):
|
||||||
s =(datetime.datetime.fromtimestamp(record.created)).strftime("%m-%d %H:%M:%S")+'| '+ (record.module).ljust(15)+(' { '+record.funcName).ljust(20) +":"+str(record.lineno).rjust(4)+'} - '+record.getMessage()
|
timestamp = (
|
||||||
return s
|
datetime.fromtimestamp(record.created)).strftime("%m-%d %H:%M:%S")
|
||||||
|
module = (record.module).ljust(15)
|
||||||
|
funcname = (record.funcName).ljust(18)
|
||||||
|
lineno = str(record.lineno).rjust(4)
|
||||||
|
message = record.getMessage()
|
||||||
|
|
||||||
|
return timestamp+'| '+module+' { '+funcname+':'+lineno+'} - '+message
|
||||||
|
|||||||
@@ -20,6 +20,13 @@ class Buffer(asyncio.Queue):
|
|||||||
super().get_nowait()
|
super().get_nowait()
|
||||||
await super().put(item)
|
await super().put(item)
|
||||||
|
|
||||||
|
def put_nowait(self,item):
|
||||||
|
if item is None:
|
||||||
|
return
|
||||||
|
if super().full():
|
||||||
|
super().get_nowait()
|
||||||
|
super().put_nowait(item)
|
||||||
|
|
||||||
async def get(self):
|
async def get(self):
|
||||||
ret = []
|
ret = []
|
||||||
ret.append(await super().get())
|
ret.append(await super().get())
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ from ..paramgen import liveparam, arcparam
|
|||||||
from ..processors.default.processor import DefaultProcessor
|
from ..processors.default.processor import DefaultProcessor
|
||||||
from ..processors.combinator import Combinator
|
from ..processors.combinator import Combinator
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
MAX_RETRY = 10
|
MAX_RETRY = 10
|
||||||
|
|
||||||
@@ -63,6 +62,9 @@ class LiveChatAsync:
|
|||||||
force_replay : bool
|
force_replay : bool
|
||||||
Trueの場合、ライブチャットが取得できる場合であっても
|
Trueの場合、ライブチャットが取得できる場合であっても
|
||||||
強制的にアーカイブ済みチャットを取得する。
|
強制的にアーカイブ済みチャットを取得する。
|
||||||
|
|
||||||
|
topchat_only : bool
|
||||||
|
Trueの場合、上位チャットのみ取得する。
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
---------
|
---------
|
||||||
@@ -81,7 +83,9 @@ class LiveChatAsync:
|
|||||||
done_callback = None,
|
done_callback = None,
|
||||||
exception_handler = None,
|
exception_handler = None,
|
||||||
direct_mode = False,
|
direct_mode = False,
|
||||||
force_replay = False
|
force_replay = False,
|
||||||
|
topchat_only = False,
|
||||||
|
logger = config.logger(__name__),
|
||||||
):
|
):
|
||||||
self.video_id = video_id
|
self.video_id = video_id
|
||||||
self.seektime = seektime
|
self.seektime = seektime
|
||||||
@@ -102,11 +106,13 @@ class LiveChatAsync:
|
|||||||
self._setup()
|
self._setup()
|
||||||
self._first_fetch = True
|
self._first_fetch = True
|
||||||
self._fetch_url = "live_chat/get_live_chat?continuation="
|
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||||
|
self._topchat_only = topchat_only
|
||||||
|
self._logger = logger
|
||||||
|
LiveChatAsync._logger = logger
|
||||||
|
|
||||||
if not LiveChatAsync._setup_finished:
|
if not LiveChatAsync._setup_finished:
|
||||||
LiveChatAsync._setup_finished = True
|
LiveChatAsync._setup_finished = True
|
||||||
if exception_handler == None:
|
if exception_handler:
|
||||||
self._set_exception_handler(self._handle_exception)
|
|
||||||
else:
|
|
||||||
self._set_exception_handler(exception_handler)
|
self._set_exception_handler(exception_handler)
|
||||||
if interruptable:
|
if interruptable:
|
||||||
signal.signal(signal.SIGINT,
|
signal.signal(signal.SIGINT,
|
||||||
@@ -172,24 +178,24 @@ class LiveChatAsync:
|
|||||||
}
|
}
|
||||||
time_mark =time.time()
|
time_mark =time.time()
|
||||||
if self._direct_mode:
|
if self._direct_mode:
|
||||||
await self._callback(
|
processed_chat = self.processor.process([chat_component])
|
||||||
self.processor.process([chat_component])
|
if isinstance(processed_chat,tuple):
|
||||||
)
|
await self._callback(*processed_chat)
|
||||||
|
else:
|
||||||
|
await self._callback(processed_chat)
|
||||||
else:
|
else:
|
||||||
await self._buffer.put(chat_component)
|
await self._buffer.put(chat_component)
|
||||||
diff_time = timeout - (time.time()-time_mark)
|
diff_time = timeout - (time.time()-time_mark)
|
||||||
await asyncio.sleep(diff_time)
|
await asyncio.sleep(diff_time)
|
||||||
continuation = metadata.get('continuation')
|
continuation = metadata.get('continuation')
|
||||||
except ChatParseException as e:
|
except ChatParseException as e:
|
||||||
#self.terminate()
|
self._logger.debug(f"[{self.video_id}]{str(e)}")
|
||||||
logger.debug(f"[{self.video_id}]{str(e)}")
|
|
||||||
return
|
return
|
||||||
except (TypeError , json.JSONDecodeError) :
|
except (TypeError , json.JSONDecodeError) :
|
||||||
#self.terminate()
|
self._logger.error(f"{traceback.format_exc(limit = -1)}")
|
||||||
logger.error(f"{traceback.format_exc(limit = -1)}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.debug(f"[{self.video_id}]finished fetching chat.")
|
self._logger.debug(f"[{self.video_id}]finished fetching chat.")
|
||||||
|
|
||||||
async def _check_pause(self, continuation):
|
async def _check_pause(self, continuation):
|
||||||
if self._pauser.empty():
|
if self._pauser.empty():
|
||||||
@@ -200,17 +206,18 @@ class LiveChatAsync:
|
|||||||
'''
|
'''
|
||||||
self._pauser.put_nowait(None)
|
self._pauser.put_nowait(None)
|
||||||
if not self._is_replay:
|
if not self._is_replay:
|
||||||
continuation = liveparam.getparam(self.video_id,3)
|
continuation = liveparam.getparam(
|
||||||
|
self.video_id, 3, self._topchat_only)
|
||||||
return continuation
|
return continuation
|
||||||
|
|
||||||
async def _get_contents(self, continuation, session, headers):
|
async def _get_contents(self, continuation, session, headers):
|
||||||
'''Get 'contents' dict from livechat json.
|
'''Get 'continuationContents' from livechat json.
|
||||||
If contents is None at first fetching,
|
If contents is None at first fetching,
|
||||||
try to fetch archive chat data.
|
try to fetch archive chat data.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
-------
|
-------
|
||||||
'contents' dict which includes metadata & chatdata.
|
'continuationContents' which includes metadata & chatdata.
|
||||||
'''
|
'''
|
||||||
livechat_json = (await
|
livechat_json = (await
|
||||||
self._get_livechat_json(continuation, session, headers)
|
self._get_livechat_json(continuation, session, headers)
|
||||||
@@ -220,12 +227,18 @@ class LiveChatAsync:
|
|||||||
if contents is None or self._is_replay:
|
if contents is None or self._is_replay:
|
||||||
'''Try to fetch archive chat data.'''
|
'''Try to fetch archive chat data.'''
|
||||||
self._parser.is_replay = True
|
self._parser.is_replay = True
|
||||||
self._fetch_url = ("live_chat_replay/"
|
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||||
"get_live_chat_replay?continuation=")
|
continuation = arcparam.getparam(
|
||||||
continuation = arcparam.getparam(self.video_id, self.seektime)
|
self.video_id, self.seektime, self._topchat_only)
|
||||||
livechat_json = (await self._get_livechat_json(
|
livechat_json = (await self._get_livechat_json(
|
||||||
continuation, session, headers))
|
continuation, session, headers))
|
||||||
|
reload_continuation = self._parser.reload_continuation(
|
||||||
|
self._parser.get_contents(livechat_json))
|
||||||
|
if reload_continuation:
|
||||||
|
livechat_json = (await self._get_livechat_json(
|
||||||
|
reload_continuation, session, headers))
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
|
self._is_replay = True
|
||||||
self._first_fetch = False
|
self._first_fetch = False
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
@@ -236,8 +249,7 @@ class LiveChatAsync:
|
|||||||
continuation = urllib.parse.quote(continuation)
|
continuation = urllib.parse.quote(continuation)
|
||||||
livechat_json = None
|
livechat_json = None
|
||||||
status_code = 0
|
status_code = 0
|
||||||
url =(
|
url =f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||||
f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1")
|
|
||||||
for _ in range(MAX_RETRY + 1):
|
for _ in range(MAX_RETRY + 1):
|
||||||
async with session.get(url ,headers = headers) as resp:
|
async with session.get(url ,headers = headers) as resp:
|
||||||
try:
|
try:
|
||||||
@@ -248,7 +260,7 @@ class LiveChatAsync:
|
|||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.error(f"[{self.video_id}]"
|
self._logger.error(f"[{self.video_id}]"
|
||||||
f"Exceeded retry count. status_code={status_code}")
|
f"Exceeded retry count. status_code={status_code}")
|
||||||
return None
|
return None
|
||||||
return livechat_json
|
return livechat_json
|
||||||
@@ -264,8 +276,11 @@ class LiveChatAsync:
|
|||||||
"""
|
"""
|
||||||
while self.is_alive():
|
while self.is_alive():
|
||||||
items = await self._buffer.get()
|
items = await self._buffer.get()
|
||||||
data = self.processor.process(items)
|
processed_chat = self.processor.process(items)
|
||||||
await callback(data)
|
if isinstance(processed_chat, tuple):
|
||||||
|
await self._callback(*processed_chat)
|
||||||
|
else:
|
||||||
|
await self._callback(processed_chat)
|
||||||
|
|
||||||
async def get(self):
|
async def get(self):
|
||||||
""" bufferからデータを取り出し、processorに投げ、
|
""" bufferからデータを取り出し、processorに投げ、
|
||||||
@@ -303,7 +318,7 @@ class LiveChatAsync:
|
|||||||
try:
|
try:
|
||||||
self.terminate()
|
self.terminate()
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
self._logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
||||||
|
|
||||||
def terminate(self):
|
def terminate(self):
|
||||||
'''
|
'''
|
||||||
@@ -313,28 +328,21 @@ class LiveChatAsync:
|
|||||||
if self._direct_mode == False:
|
if self._direct_mode == False:
|
||||||
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
|
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
|
||||||
self._buffer.put_nowait({'chatdata':'','timeout':0})
|
self._buffer.put_nowait({'chatdata':'','timeout':0})
|
||||||
logger.info(f'[{self.video_id}]finished.')
|
self._logger.info(f'[{self.video_id}]finished.')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _set_exception_handler(cls, handler):
|
def _set_exception_handler(cls, handler):
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.set_exception_handler(handler)
|
loop.set_exception_handler(handler)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _handle_exception(cls, loop, context):
|
|
||||||
if not isinstance(context["exception"],CancelledError):
|
|
||||||
logger.error(f"Caught exception: {context}")
|
|
||||||
loop= asyncio.get_event_loop()
|
|
||||||
loop.create_task(cls.shutdown(None,None,None))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def shutdown(cls, event, sig = None, handler=None):
|
async def shutdown(cls, event, sig = None, handler=None):
|
||||||
logger.debug("shutdown...")
|
cls._logger.debug("shutdown...")
|
||||||
tasks = [t for t in asyncio.all_tasks() if t is not
|
tasks = [t for t in asyncio.all_tasks() if t is not
|
||||||
asyncio.current_task()]
|
asyncio.current_task()]
|
||||||
[task.cancel() for task in tasks]
|
[task.cancel() for task in tasks]
|
||||||
|
|
||||||
logger.debug(f"complete remaining tasks...")
|
cls._logger.debug(f"complete remaining tasks...")
|
||||||
await asyncio.gather(*tasks,return_exceptions=True)
|
await asyncio.gather(*tasks,return_exceptions=True)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.stop()
|
loop.stop()
|
||||||
@@ -1,317 +0,0 @@
|
|||||||
import aiohttp, asyncio
|
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import signal
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
import urllib.parse
|
|
||||||
import warnings
|
|
||||||
from aiohttp.client_exceptions import ClientConnectorError
|
|
||||||
from concurrent.futures import CancelledError
|
|
||||||
from asyncio import Queue
|
|
||||||
from .buffer import Buffer
|
|
||||||
from ..parser.replay import Parser
|
|
||||||
from .. import config
|
|
||||||
from ..exceptions import ChatParseException,IllegalFunctionCall
|
|
||||||
from ..paramgen import arcparam
|
|
||||||
from ..processors.default.processor import DefaultProcessor
|
|
||||||
from ..processors.combinator import Combinator
|
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
headers = config.headers
|
|
||||||
MAX_RETRY = 10
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ReplayChatAsync:
|
|
||||||
'''
|
|
||||||
### -----------------------------------------------------------
|
|
||||||
### [Warning] ReplayChatAsync is integrated into LiveChatAsync.
|
|
||||||
### This class is deprecated and will be removed at v0.0.5.0.
|
|
||||||
### ReplayChatAsyncはLiveChatAsyncに統合しました。
|
|
||||||
### このクラスはv0.0.5.0で廃止予定です。
|
|
||||||
### -----------------------------------------------------------
|
|
||||||
|
|
||||||
asyncio(aiohttp)を利用してYouTubeのチャットデータを取得する。
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
video_id : str
|
|
||||||
動画ID
|
|
||||||
|
|
||||||
seektime : int
|
|
||||||
リプレイするチャットデータの開始時間(秒)
|
|
||||||
|
|
||||||
processor : ChatProcessor
|
|
||||||
チャットデータを加工するオブジェクト
|
|
||||||
|
|
||||||
buffer : Buffer(maxsize:20[default])
|
|
||||||
チャットデータchat_componentを格納するバッファ。
|
|
||||||
maxsize : 格納できるchat_componentの個数
|
|
||||||
default値20個。1個で約5~10秒分。
|
|
||||||
|
|
||||||
interruptable : bool
|
|
||||||
Ctrl+Cによる処理中断を行うかどうか。
|
|
||||||
|
|
||||||
callback : func
|
|
||||||
_listen()関数から一定間隔で自動的に呼びだす関数。
|
|
||||||
|
|
||||||
done_callback : func
|
|
||||||
listener終了時に呼び出すコールバック。
|
|
||||||
|
|
||||||
exception_handler : func
|
|
||||||
例外を処理する関数
|
|
||||||
|
|
||||||
direct_mode : bool
|
|
||||||
Trueの場合、bufferを使わずにcallbackを呼ぶ。
|
|
||||||
Trueの場合、callbackの設定が必須
|
|
||||||
(設定していない場合IllegalFunctionCall例外を発生させる)
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
---------
|
|
||||||
_is_alive : bool
|
|
||||||
チャット取得を停止するためのフラグ
|
|
||||||
'''
|
|
||||||
|
|
||||||
_setup_finished = False
|
|
||||||
|
|
||||||
def __init__(self, video_id,
|
|
||||||
seektime = 0,
|
|
||||||
processor = DefaultProcessor(),
|
|
||||||
buffer = None,
|
|
||||||
interruptable = True,
|
|
||||||
callback = None,
|
|
||||||
done_callback = None,
|
|
||||||
exception_handler = None,
|
|
||||||
direct_mode = False):
|
|
||||||
|
|
||||||
warnings.warn(""
|
|
||||||
f"\n{'-'*60}\n[WARNING] ReplayChatAsync is integrated "
|
|
||||||
f"into LiveChatAsync.\n{' '*5} This is deprecated and will"
|
|
||||||
f" be removed at v0.0.5.0.\n{'-'*60}\n"
|
|
||||||
)
|
|
||||||
self.video_id = video_id
|
|
||||||
self.seektime = seektime
|
|
||||||
if isinstance(processor, tuple):
|
|
||||||
self.processor = Combinator(processor)
|
|
||||||
else:
|
|
||||||
self.processor = processor
|
|
||||||
self._buffer = buffer
|
|
||||||
self._callback = callback
|
|
||||||
self._done_callback = done_callback
|
|
||||||
self._exception_handler = exception_handler
|
|
||||||
self._direct_mode = direct_mode
|
|
||||||
self._is_alive = True
|
|
||||||
self._parser = Parser()
|
|
||||||
self._pauser = Queue()
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
self._setup()
|
|
||||||
|
|
||||||
if not ReplayChatAsync._setup_finished:
|
|
||||||
ReplayChatAsync._setup_finished = True
|
|
||||||
if exception_handler == None:
|
|
||||||
self._set_exception_handler(self._handle_exception)
|
|
||||||
else:
|
|
||||||
self._set_exception_handler(exception_handler)
|
|
||||||
if interruptable:
|
|
||||||
signal.signal(signal.SIGINT,
|
|
||||||
(lambda a, b:asyncio.create_task(
|
|
||||||
ReplayChatAsync.shutdown(None,signal.SIGINT,b))
|
|
||||||
))
|
|
||||||
|
|
||||||
def _setup(self):
|
|
||||||
#direct modeがTrueでcallback未設定の場合例外発生。
|
|
||||||
if self._direct_mode:
|
|
||||||
if self._callback is None:
|
|
||||||
raise IllegalFunctionCall(
|
|
||||||
"direct_mode=Trueの場合callbackの設定が必須です。")
|
|
||||||
else:
|
|
||||||
#direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
|
|
||||||
if self._buffer is None:
|
|
||||||
self._buffer = Buffer(maxsize = 20)
|
|
||||||
#callbackが指定されている場合はcallbackを呼ぶループタスクを作成
|
|
||||||
if self._callback is None:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
#callbackを呼ぶループタスクの開始
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.create_task(self._callback_loop(self._callback))
|
|
||||||
#_listenループタスクの開始
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
listen_task = loop.create_task(self._startlisten())
|
|
||||||
#add_done_callbackの登録
|
|
||||||
if self._done_callback is None:
|
|
||||||
listen_task.add_done_callback(self.finish)
|
|
||||||
else:
|
|
||||||
listen_task.add_done_callback(self._done_callback)
|
|
||||||
|
|
||||||
async def _startlisten(self):
|
|
||||||
"""最初のcontinuationパラメータを取得し、
|
|
||||||
_listenループのタスクを作成し開始する
|
|
||||||
"""
|
|
||||||
initial_continuation = arcparam.getparam(self.video_id, self.seektime)
|
|
||||||
await self._listen(initial_continuation)
|
|
||||||
|
|
||||||
async def _listen(self, continuation):
|
|
||||||
''' continuationに紐付いたチャットデータを取得し
|
|
||||||
Bufferにチャットデータを格納、
|
|
||||||
次のcontinuaitonを取得してループする。
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
continuation : str
|
|
||||||
次のチャットデータ取得に必要なパラメータ
|
|
||||||
'''
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
while(continuation and self._is_alive):
|
|
||||||
if self._pauser.empty():
|
|
||||||
'''pause'''
|
|
||||||
await self._pauser.get()
|
|
||||||
'''resume:
|
|
||||||
prohibit from blocking by putting None into _pauser.
|
|
||||||
'''
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
#when replay, not reacquire continuation param
|
|
||||||
livechat_json = (await
|
|
||||||
self._get_livechat_json(continuation, session, headers)
|
|
||||||
)
|
|
||||||
metadata, chatdata = self._parser.parse( livechat_json )
|
|
||||||
timeout = metadata['timeoutMs']/1000
|
|
||||||
chat_component = {
|
|
||||||
"video_id" : self.video_id,
|
|
||||||
"timeout" : timeout,
|
|
||||||
"chatdata" : chatdata
|
|
||||||
}
|
|
||||||
time_mark =time.time()
|
|
||||||
if self._direct_mode:
|
|
||||||
await self._callback(
|
|
||||||
self.processor.process([chat_component])
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await self._buffer.put(chat_component)
|
|
||||||
diff_time = timeout - (time.time()-time_mark)
|
|
||||||
await asyncio.sleep(diff_time)
|
|
||||||
continuation = metadata.get('continuation')
|
|
||||||
except ChatParseException as e:
|
|
||||||
self.terminate()
|
|
||||||
logger.error(f"{str(e)}(video_id:\"{self.video_id}\")")
|
|
||||||
return
|
|
||||||
except (TypeError , json.JSONDecodeError) :
|
|
||||||
self.terminate()
|
|
||||||
logger.error(f"{traceback.format_exc(limit = -1)}")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.debug(f"[{self.video_id}]チャット取得を終了しました。")
|
|
||||||
self.terminate()
|
|
||||||
|
|
||||||
async def _get_livechat_json(self, continuation, session, headers):
|
|
||||||
'''
|
|
||||||
チャットデータが格納されたjsonデータを取得する。
|
|
||||||
'''
|
|
||||||
continuation = urllib.parse.quote(continuation)
|
|
||||||
livechat_json = None
|
|
||||||
status_code = 0
|
|
||||||
url =(
|
|
||||||
f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?"
|
|
||||||
f"continuation={continuation}&pbj=1")
|
|
||||||
for _ in range(MAX_RETRY + 1):
|
|
||||||
async with session.get(url ,headers = headers) as resp:
|
|
||||||
try:
|
|
||||||
text = await resp.text()
|
|
||||||
status_code = resp.status
|
|
||||||
livechat_json = json.loads(text)
|
|
||||||
break
|
|
||||||
except (ClientConnectorError,json.JSONDecodeError) :
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
logger.error(f"[{self.video_id}]"
|
|
||||||
f"Exceeded retry count. status_code={status_code}")
|
|
||||||
return None
|
|
||||||
return livechat_json
|
|
||||||
|
|
||||||
async def _callback_loop(self,callback):
|
|
||||||
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
|
|
||||||
callbackに指定された関数に一定間隔でチャットデータを投げる。
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
callback : func
|
|
||||||
加工済みのチャットデータを渡す先の関数。
|
|
||||||
"""
|
|
||||||
while self.is_alive():
|
|
||||||
items = await self._buffer.get()
|
|
||||||
data = self.processor.process(items)
|
|
||||||
await callback(data)
|
|
||||||
|
|
||||||
async def get(self):
|
|
||||||
""" bufferからデータを取り出し、processorに投げ、
|
|
||||||
加工済みのチャットデータを返す。
|
|
||||||
|
|
||||||
Returns
|
|
||||||
: Processorによって加工されたチャットデータ
|
|
||||||
"""
|
|
||||||
if self._callback is None:
|
|
||||||
items = await self._buffer.get()
|
|
||||||
return self.processor.process(items)
|
|
||||||
raise IllegalFunctionCall(
|
|
||||||
"既にcallbackを登録済みのため、get()は実行できません。")
|
|
||||||
|
|
||||||
def pause(self):
|
|
||||||
if self._callback is None:
|
|
||||||
return
|
|
||||||
if not self._pauser.empty():
|
|
||||||
self._pauser.get_nowait()
|
|
||||||
|
|
||||||
def resume(self):
|
|
||||||
if self._callback is None:
|
|
||||||
return
|
|
||||||
if self._pauser.empty():
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
|
|
||||||
def is_alive(self):
|
|
||||||
return self._is_alive
|
|
||||||
|
|
||||||
def finish(self,sender):
|
|
||||||
'''Listener終了時のコールバック'''
|
|
||||||
try:
|
|
||||||
self.terminate()
|
|
||||||
except CancelledError:
|
|
||||||
logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
|
||||||
|
|
||||||
def terminate(self):
|
|
||||||
'''
|
|
||||||
Listenerを終了する。
|
|
||||||
'''
|
|
||||||
self._is_alive = False
|
|
||||||
if self._direct_mode == False:
|
|
||||||
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
|
|
||||||
self._buffer.put_nowait({'chatdata':'','timeout':1})
|
|
||||||
logger.info(f'[{self.video_id}]終了しました')
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _set_exception_handler(cls, handler):
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.set_exception_handler(handler)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _handle_exception(cls, loop, context):
|
|
||||||
if not isinstance(context["exception"],CancelledError):
|
|
||||||
logger.error(f"Caught exception: {context}")
|
|
||||||
loop= asyncio.get_event_loop()
|
|
||||||
loop.create_task(cls.shutdown(None,None,None))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
async def shutdown(cls, event, sig = None, handler=None):
|
|
||||||
logger.debug("シャットダウンしています")
|
|
||||||
tasks = [t for t in asyncio.all_tasks() if t is not
|
|
||||||
asyncio.current_task()]
|
|
||||||
[task.cancel() for task in tasks]
|
|
||||||
|
|
||||||
logger.debug(f"残っているタスクを終了しています")
|
|
||||||
await asyncio.gather(*tasks,return_exceptions=True)
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.stop()
|
|
||||||
@@ -22,7 +22,14 @@ class Buffer(queue.Queue):
|
|||||||
else:
|
else:
|
||||||
super().put(item)
|
super().put(item)
|
||||||
|
|
||||||
|
def put_nowait(self,item):
|
||||||
|
if item is None:
|
||||||
|
return
|
||||||
|
if super().full():
|
||||||
|
super().get_nowait()
|
||||||
|
else:
|
||||||
|
super().put_nowait(item)
|
||||||
|
|
||||||
def get(self):
|
def get(self):
|
||||||
ret = []
|
ret = []
|
||||||
ret.append(super().get())
|
ret.append(super().get())
|
||||||
|
|||||||
@@ -11,12 +11,11 @@ from queue import Queue
|
|||||||
from .buffer import Buffer
|
from .buffer import Buffer
|
||||||
from ..parser.live import Parser
|
from ..parser.live import Parser
|
||||||
from .. import config
|
from .. import config
|
||||||
from ..exceptions import ChatParseException,IllegalFunctionCall
|
from ..exceptions import ChatParseException, IllegalFunctionCall
|
||||||
from ..paramgen import liveparam, arcparam
|
from ..paramgen import liveparam, arcparam
|
||||||
from ..processors.default.processor import DefaultProcessor
|
from ..processors.default.processor import DefaultProcessor
|
||||||
from ..processors.combinator import Combinator
|
from ..processors.combinator import Combinator
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
headers = config.headers
|
headers = config.headers
|
||||||
MAX_RETRY = 10
|
MAX_RETRY = 10
|
||||||
|
|
||||||
@@ -28,7 +27,7 @@ class LiveChat:
|
|||||||
---------
|
---------
|
||||||
video_id : str
|
video_id : str
|
||||||
動画ID
|
動画ID
|
||||||
|
|
||||||
seektime : int
|
seektime : int
|
||||||
(ライブチャット取得時は無視)
|
(ライブチャット取得時は無視)
|
||||||
取得開始するアーカイブ済みチャットの経過時間(秒)
|
取得開始するアーカイブ済みチャットの経過時間(秒)
|
||||||
@@ -60,6 +59,9 @@ class LiveChat:
|
|||||||
Trueの場合、ライブチャットが取得できる場合であっても
|
Trueの場合、ライブチャットが取得できる場合であっても
|
||||||
強制的にアーカイブ済みチャットを取得する。
|
強制的にアーカイブ済みチャットを取得する。
|
||||||
|
|
||||||
|
topchat_only : bool
|
||||||
|
Trueの場合、上位チャットのみ取得する。
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
---------
|
---------
|
||||||
_executor : ThreadPoolExecutor
|
_executor : ThreadPoolExecutor
|
||||||
@@ -70,19 +72,20 @@ class LiveChat:
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
_setup_finished = False
|
_setup_finished = False
|
||||||
#チャット監視中のListenerのリスト
|
|
||||||
_listeners= []
|
|
||||||
def __init__(self, video_id,
|
def __init__(self, video_id,
|
||||||
seektime = 0,
|
seektime=0,
|
||||||
processor = DefaultProcessor(),
|
processor=DefaultProcessor(),
|
||||||
buffer = None,
|
buffer=None,
|
||||||
interruptable = True,
|
interruptable=True,
|
||||||
callback = None,
|
callback=None,
|
||||||
done_callback = None,
|
done_callback=None,
|
||||||
direct_mode = False,
|
direct_mode=False,
|
||||||
force_replay = False
|
force_replay=False,
|
||||||
):
|
topchat_only=False,
|
||||||
self.video_id = video_id
|
logger=config.logger(__name__)
|
||||||
|
):
|
||||||
|
self.video_id = video_id
|
||||||
self.seektime = seektime
|
self.seektime = seektime
|
||||||
if isinstance(processor, tuple):
|
if isinstance(processor, tuple):
|
||||||
self.processor = Combinator(processor)
|
self.processor = Combinator(processor)
|
||||||
@@ -93,56 +96,51 @@ class LiveChat:
|
|||||||
self._done_callback = done_callback
|
self._done_callback = done_callback
|
||||||
self._executor = ThreadPoolExecutor(max_workers=2)
|
self._executor = ThreadPoolExecutor(max_workers=2)
|
||||||
self._direct_mode = direct_mode
|
self._direct_mode = direct_mode
|
||||||
self._is_alive = True
|
self._is_alive = True
|
||||||
self._is_replay = force_replay
|
self._is_replay = force_replay
|
||||||
self._parser = Parser(is_replay = self._is_replay)
|
self._parser = Parser(is_replay=self._is_replay)
|
||||||
self._pauser = Queue()
|
self._pauser = Queue()
|
||||||
self._pauser.put_nowait(None)
|
self._pauser.put_nowait(None)
|
||||||
self._setup()
|
|
||||||
self._first_fetch = True
|
self._first_fetch = True
|
||||||
self._fetch_url = "live_chat/get_live_chat?continuation="
|
self._fetch_url = "live_chat/get_live_chat?continuation="
|
||||||
|
self._topchat_only = topchat_only
|
||||||
if not LiveChat._setup_finished:
|
self._logger = logger
|
||||||
LiveChat._setup_finished = True
|
if interruptable:
|
||||||
if interruptable:
|
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
|
||||||
signal.signal(signal.SIGINT, (lambda a, b:
|
self._setup()
|
||||||
(LiveChat.shutdown(None,signal.SIGINT,b))
|
|
||||||
))
|
|
||||||
LiveChat._listeners.append(self)
|
|
||||||
|
|
||||||
def _setup(self):
|
def _setup(self):
|
||||||
#logger.debug("setup")
|
# direct modeがTrueでcallback未設定の場合例外発生。
|
||||||
#direct modeがTrueでcallback未設定の場合例外発生。
|
|
||||||
if self._direct_mode:
|
if self._direct_mode:
|
||||||
if self._callback is None:
|
if self._callback is None:
|
||||||
raise IllegalFunctionCall(
|
raise IllegalFunctionCall(
|
||||||
"When direct_mode=True, callback parameter is required.")
|
"When direct_mode=True, callback parameter is required.")
|
||||||
else:
|
else:
|
||||||
#direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
|
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
|
||||||
if self._buffer is None:
|
if self._buffer is None:
|
||||||
self._buffer = Buffer(maxsize = 20)
|
self._buffer = Buffer(maxsize=20)
|
||||||
#callbackが指定されている場合はcallbackを呼ぶループタスクを作成
|
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成
|
||||||
if self._callback is None:
|
if self._callback is None:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
#callbackを呼ぶループタスクの開始
|
# callbackを呼ぶループタスクの開始
|
||||||
self._executor.submit(self._callback_loop,self._callback)
|
self._executor.submit(self._callback_loop, self._callback)
|
||||||
#_listenループタスクの開始
|
# _listenループタスクの開始
|
||||||
listen_task = self._executor.submit(self._startlisten)
|
listen_task = self._executor.submit(self._startlisten)
|
||||||
#add_done_callbackの登録
|
# add_done_callbackの登録
|
||||||
if self._done_callback is None:
|
if self._done_callback is None:
|
||||||
listen_task.add_done_callback(self.finish)
|
listen_task.add_done_callback(self.finish)
|
||||||
else:
|
else:
|
||||||
listen_task.add_done_callback(self._done_callback)
|
listen_task.add_done_callback(self._done_callback)
|
||||||
|
|
||||||
def _startlisten(self):
|
def _startlisten(self):
|
||||||
time.sleep(0.1) #sleep shortly to prohibit skipping fetching data
|
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
|
||||||
"""Fetch first continuation parameter,
|
"""Fetch first continuation parameter,
|
||||||
create and start _listen loop.
|
create and start _listen loop.
|
||||||
"""
|
"""
|
||||||
initial_continuation = liveparam.getparam(self.video_id,3)
|
initial_continuation = liveparam.getparam(self.video_id, 3)
|
||||||
self._listen(initial_continuation)
|
self._listen(initial_continuation)
|
||||||
|
|
||||||
def _listen(self, continuation):
|
def _listen(self, continuation):
|
||||||
''' Fetch chat data and store them into buffer,
|
''' Fetch chat data and store them into buffer,
|
||||||
get next continuaiton parameter and loop.
|
get next continuaiton parameter and loop.
|
||||||
@@ -158,32 +156,35 @@ class LiveChat:
|
|||||||
continuation = self._check_pause(continuation)
|
continuation = self._check_pause(continuation)
|
||||||
contents = self._get_contents(
|
contents = self._get_contents(
|
||||||
continuation, session, headers)
|
continuation, session, headers)
|
||||||
metadata, chatdata = self._parser.parse(contents)
|
metadata, chatdata = self._parser.parse(contents)
|
||||||
|
|
||||||
timeout = metadata['timeoutMs']/1000
|
timeout = metadata['timeoutMs']/1000
|
||||||
chat_component = {
|
chat_component = {
|
||||||
"video_id" : self.video_id,
|
"video_id": self.video_id,
|
||||||
"timeout" : timeout,
|
"timeout": timeout,
|
||||||
"chatdata" : chatdata
|
"chatdata": chatdata
|
||||||
}
|
}
|
||||||
time_mark =time.time()
|
time_mark = time.time()
|
||||||
if self._direct_mode:
|
if self._direct_mode:
|
||||||
self._callback(
|
processed_chat = self.processor.process(
|
||||||
self.processor.process([chat_component])
|
[chat_component])
|
||||||
)
|
if isinstance(processed_chat, tuple):
|
||||||
|
self._callback(*processed_chat)
|
||||||
|
else:
|
||||||
|
self._callback(processed_chat)
|
||||||
else:
|
else:
|
||||||
self._buffer.put(chat_component)
|
self._buffer.put(chat_component)
|
||||||
diff_time = timeout - (time.time()-time_mark)
|
diff_time = timeout - (time.time()-time_mark)
|
||||||
time.sleep(diff_time if diff_time > 0 else 0)
|
time.sleep(diff_time if diff_time > 0 else 0)
|
||||||
continuation = metadata.get('continuation')
|
continuation = metadata.get('continuation')
|
||||||
except ChatParseException as e:
|
except ChatParseException as e:
|
||||||
logger.debug(f"[{self.video_id}]{str(e)}")
|
self._logger.debug(f"[{self.video_id}]{str(e)}")
|
||||||
return
|
|
||||||
except (TypeError , json.JSONDecodeError) :
|
|
||||||
logger.error(f"{traceback.format_exc(limit = -1)}")
|
|
||||||
return
|
return
|
||||||
|
except (TypeError, json.JSONDecodeError):
|
||||||
logger.debug(f"[{self.video_id}]finished fetching chat.")
|
self._logger.error(f"{traceback.format_exc(limit = -1)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
self._logger.debug(f"[{self.video_id}]finished fetching chat.")
|
||||||
|
|
||||||
def _check_pause(self, continuation):
|
def _check_pause(self, continuation):
|
||||||
if self._pauser.empty():
|
if self._pauser.empty():
|
||||||
@@ -194,19 +195,19 @@ class LiveChat:
|
|||||||
'''
|
'''
|
||||||
self._pauser.put_nowait(None)
|
self._pauser.put_nowait(None)
|
||||||
if not self._is_replay:
|
if not self._is_replay:
|
||||||
continuation = liveparam.getparam(self.video_id,3)
|
continuation = liveparam.getparam(self.video_id, 3)
|
||||||
return continuation
|
return continuation
|
||||||
|
|
||||||
def _get_contents(self, continuation, session, headers):
|
def _get_contents(self, continuation, session, headers):
|
||||||
'''Get 'contents' dict from livechat json.
|
'''Get 'continuationContents' from livechat json.
|
||||||
If contents is None at first fetching,
|
If contents is None at first fetching,
|
||||||
try to fetch archive chat data.
|
try to fetch archive chat data.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
-------
|
-------
|
||||||
'contents' dict which includes metadata & chatdata.
|
'continuationContents' which includes metadata & chat data.
|
||||||
'''
|
'''
|
||||||
livechat_json = (
|
livechat_json = (
|
||||||
self._get_livechat_json(continuation, session, headers)
|
self._get_livechat_json(continuation, session, headers)
|
||||||
)
|
)
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
@@ -214,12 +215,18 @@ class LiveChat:
|
|||||||
if contents is None or self._is_replay:
|
if contents is None or self._is_replay:
|
||||||
'''Try to fetch archive chat data.'''
|
'''Try to fetch archive chat data.'''
|
||||||
self._parser.is_replay = True
|
self._parser.is_replay = True
|
||||||
self._fetch_url = ("live_chat_replay/"
|
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
|
||||||
"get_live_chat_replay?continuation=")
|
continuation = arcparam.getparam(
|
||||||
continuation = arcparam.getparam(self.video_id, self.seektime)
|
self.video_id, self.seektime, self._topchat_only)
|
||||||
livechat_json = ( self._get_livechat_json(
|
livechat_json = (self._get_livechat_json(
|
||||||
continuation, session, headers))
|
continuation, session, headers))
|
||||||
|
reload_continuation = self._parser.reload_continuation(
|
||||||
|
self._parser.get_contents(livechat_json))
|
||||||
|
if reload_continuation:
|
||||||
|
livechat_json = (self._get_livechat_json(
|
||||||
|
reload_continuation, session, headers))
|
||||||
contents = self._parser.get_contents(livechat_json)
|
contents = self._parser.get_contents(livechat_json)
|
||||||
|
self._is_replay = True
|
||||||
self._first_fetch = False
|
self._first_fetch = False
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
@@ -230,27 +237,26 @@ class LiveChat:
|
|||||||
continuation = urllib.parse.quote(continuation)
|
continuation = urllib.parse.quote(continuation)
|
||||||
livechat_json = None
|
livechat_json = None
|
||||||
status_code = 0
|
status_code = 0
|
||||||
url =(
|
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
|
||||||
f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1")
|
|
||||||
for _ in range(MAX_RETRY + 1):
|
for _ in range(MAX_RETRY + 1):
|
||||||
with session.get(url ,headers = headers) as resp:
|
with session.get(url, headers=headers) as resp:
|
||||||
try:
|
try:
|
||||||
text = resp.text
|
text = resp.text
|
||||||
livechat_json = json.loads(text)
|
livechat_json = json.loads(text)
|
||||||
break
|
break
|
||||||
except json.JSONDecodeError :
|
except json.JSONDecodeError:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.error(f"[{self.video_id}]"
|
self._logger.error(f"[{self.video_id}]"
|
||||||
f"Exceeded retry count. status_code={status_code}")
|
f"Exceeded retry count. status_code={status_code}")
|
||||||
return None
|
return None
|
||||||
return livechat_json
|
return livechat_json
|
||||||
|
|
||||||
def _callback_loop(self,callback):
|
def _callback_loop(self, callback):
|
||||||
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
|
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
|
||||||
callbackに指定された関数に一定間隔でチャットデータを投げる。
|
callbackに指定された関数に一定間隔でチャットデータを投げる。
|
||||||
|
|
||||||
Parameter
|
Parameter
|
||||||
---------
|
---------
|
||||||
callback : func
|
callback : func
|
||||||
@@ -258,19 +264,22 @@ class LiveChat:
|
|||||||
"""
|
"""
|
||||||
while self.is_alive():
|
while self.is_alive():
|
||||||
items = self._buffer.get()
|
items = self._buffer.get()
|
||||||
data = self.processor.process(items)
|
processed_chat = self.processor.process(items)
|
||||||
callback(data)
|
if isinstance(processed_chat, tuple):
|
||||||
|
self._callback(*processed_chat)
|
||||||
|
else:
|
||||||
|
self._callback(processed_chat)
|
||||||
|
|
||||||
def get(self):
|
def get(self):
|
||||||
""" bufferからデータを取り出し、processorに投げ、
|
""" bufferからデータを取り出し、processorに投げ、
|
||||||
加工済みのチャットデータを返す。
|
加工済みのチャットデータを返す。
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
: Processorによって加工されたチャットデータ
|
: Processorによって加工されたチャットデータ
|
||||||
"""
|
"""
|
||||||
if self._callback is None:
|
if self._callback is None:
|
||||||
items = self._buffer.get()
|
items = self._buffer.get()
|
||||||
return self.processor.process(items)
|
return self.processor.process(items)
|
||||||
raise IllegalFunctionCall(
|
raise IllegalFunctionCall(
|
||||||
"既にcallbackを登録済みのため、get()は実行できません。")
|
"既にcallbackを登録済みのため、get()は実行できません。")
|
||||||
|
|
||||||
@@ -288,29 +297,22 @@ class LiveChat:
|
|||||||
return
|
return
|
||||||
if self._pauser.empty():
|
if self._pauser.empty():
|
||||||
self._pauser.put_nowait(None)
|
self._pauser.put_nowait(None)
|
||||||
|
|
||||||
def is_alive(self):
|
def is_alive(self):
|
||||||
return self._is_alive
|
return self._is_alive
|
||||||
|
|
||||||
def finish(self,sender):
|
def finish(self, sender):
|
||||||
'''Listener終了時のコールバック'''
|
'''Listener終了時のコールバック'''
|
||||||
try:
|
try:
|
||||||
self.terminate()
|
self.terminate()
|
||||||
except CancelledError:
|
except CancelledError:
|
||||||
logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
self._logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
||||||
|
|
||||||
def terminate(self):
|
def terminate(self):
|
||||||
'''
|
'''
|
||||||
Listenerを終了する。
|
Listenerを終了する。
|
||||||
'''
|
'''
|
||||||
self._is_alive = False
|
if self.is_alive():
|
||||||
if self._direct_mode == False:
|
self._is_alive = False
|
||||||
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
|
self._buffer.put({})
|
||||||
self._buffer.put({'chatdata':'','timeout':0})
|
self._logger.info(f'[{self.video_id}]終了しました')
|
||||||
logger.info(f'[{self.video_id}]finished.')
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def shutdown(cls, event, sig = None, handler=None):
|
|
||||||
logger.debug("shutdown...")
|
|
||||||
for t in LiveChat._listeners:
|
|
||||||
t._is_alive = False
|
|
||||||
|
|||||||
@@ -1,309 +0,0 @@
|
|||||||
import requests
|
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import signal
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
import urllib.parse
|
|
||||||
import warnings
|
|
||||||
from concurrent.futures import CancelledError, ThreadPoolExecutor
|
|
||||||
from queue import Queue
|
|
||||||
from .buffer import Buffer
|
|
||||||
from ..parser.replay import Parser
|
|
||||||
from .. import config
|
|
||||||
from ..exceptions import ChatParseException,IllegalFunctionCall
|
|
||||||
from ..paramgen import arcparam
|
|
||||||
from ..processors.default.processor import DefaultProcessor
|
|
||||||
from ..processors.combinator import Combinator
|
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
headers = config.headers
|
|
||||||
MAX_RETRY = 10
|
|
||||||
|
|
||||||
|
|
||||||
class ReplayChat:
|
|
||||||
'''
|
|
||||||
### -----------------------------------------------------------
|
|
||||||
### [Warning] ReplayChat is integrated into LiveChat.
|
|
||||||
### This class is deprecated and will be removed at v0.0.5.0.
|
|
||||||
### ReplayChatはLiveChatに統合しました。
|
|
||||||
### このクラスはv0.0.5.0で廃止予定です。
|
|
||||||
### -----------------------------------------------------------
|
|
||||||
|
|
||||||
スレッドプールを利用してYouTubeのライブ配信のチャットデータを取得する
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
video_id : str
|
|
||||||
動画ID
|
|
||||||
|
|
||||||
seektime : int
|
|
||||||
リプレイするチャットデータの開始時間(秒)
|
|
||||||
|
|
||||||
processor : ChatProcessor
|
|
||||||
チャットデータを加工するオブジェクト
|
|
||||||
|
|
||||||
buffer : Buffer(maxsize:20[default])
|
|
||||||
チャットデータchat_componentを格納するバッファ。
|
|
||||||
maxsize : 格納できるchat_componentの個数
|
|
||||||
default値20個。1個で約5~10秒分。
|
|
||||||
|
|
||||||
interruptable : bool
|
|
||||||
Ctrl+Cによる処理中断を行うかどうか。
|
|
||||||
|
|
||||||
callback : func
|
|
||||||
_listen()関数から一定間隔で自動的に呼びだす関数。
|
|
||||||
|
|
||||||
done_callback : func
|
|
||||||
listener終了時に呼び出すコールバック。
|
|
||||||
|
|
||||||
direct_mode : bool
|
|
||||||
Trueの場合、bufferを使わずにcallbackを呼ぶ。
|
|
||||||
Trueの場合、callbackの設定が必須
|
|
||||||
(設定していない場合IllegalFunctionCall例外を発生させる)
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
---------
|
|
||||||
_executor : ThreadPoolExecutor
|
|
||||||
チャットデータ取得ループ(_listen)用のスレッド
|
|
||||||
|
|
||||||
_is_alive : bool
|
|
||||||
チャット取得を停止するためのフラグ
|
|
||||||
'''
|
|
||||||
|
|
||||||
_setup_finished = False
|
|
||||||
|
|
||||||
#チャット監視中のListenerのリスト
|
|
||||||
_listeners= []
|
|
||||||
|
|
||||||
def __init__(self, video_id,
|
|
||||||
seektime = 0,
|
|
||||||
processor = DefaultProcessor(),
|
|
||||||
buffer = None,
|
|
||||||
interruptable = True,
|
|
||||||
callback = None,
|
|
||||||
done_callback = None,
|
|
||||||
direct_mode = False
|
|
||||||
):
|
|
||||||
|
|
||||||
warnings.warn(""
|
|
||||||
f"\n{'-'*60}\n[WARNING] ReplayChat is integrated into LiveChat.\n"
|
|
||||||
f"{' '*5}This is deprecated and will be removed at v0.0.5.0.\n"
|
|
||||||
f"{'-'*60}\n"
|
|
||||||
)
|
|
||||||
self.video_id = video_id
|
|
||||||
self.seektime = seektime
|
|
||||||
if isinstance(processor, tuple):
|
|
||||||
self.processor = Combinator(processor)
|
|
||||||
else:
|
|
||||||
self.processor = processor
|
|
||||||
self._buffer = buffer
|
|
||||||
self._callback = callback
|
|
||||||
self._done_callback = done_callback
|
|
||||||
self._executor = ThreadPoolExecutor(max_workers=2)
|
|
||||||
self._direct_mode = direct_mode
|
|
||||||
self._is_alive = True
|
|
||||||
self._parser = Parser()
|
|
||||||
self._pauser = Queue()
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
|
|
||||||
self._setup()
|
|
||||||
|
|
||||||
if not ReplayChat._setup_finished:
|
|
||||||
ReplayChat._setup_finished = True
|
|
||||||
if interruptable:
|
|
||||||
signal.signal(signal.SIGINT, (lambda a, b:
|
|
||||||
(ReplayChat.shutdown(None,signal.SIGINT,b))
|
|
||||||
))
|
|
||||||
ReplayChat._listeners.append(self)
|
|
||||||
|
|
||||||
def _setup(self):
|
|
||||||
#direct modeがTrueでcallback未設定の場合例外発生。
|
|
||||||
if self._direct_mode:
|
|
||||||
if self._callback is None:
|
|
||||||
raise IllegalFunctionCall(
|
|
||||||
"direct_mode=Trueの場合callbackの設定が必須です。")
|
|
||||||
else:
|
|
||||||
#direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
|
|
||||||
if self._buffer is None:
|
|
||||||
self._buffer = Buffer(maxsize = 20)
|
|
||||||
#callbackが指定されている場合はcallbackを呼ぶループタスクを作成
|
|
||||||
if self._callback is None:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
#callbackを呼ぶループタスクの開始
|
|
||||||
self._executor.submit(self._callback_loop,self._callback)
|
|
||||||
#_listenループタスクの開始
|
|
||||||
listen_task = self._executor.submit(self._startlisten)
|
|
||||||
#add_done_callbackの登録
|
|
||||||
if self._done_callback is None:
|
|
||||||
listen_task.add_done_callback(self.finish)
|
|
||||||
else:
|
|
||||||
listen_task.add_done_callback(self._done_callback)
|
|
||||||
|
|
||||||
def _startlisten(self):
|
|
||||||
"""最初のcontinuationパラメータを取得し、
|
|
||||||
_listenループのタスクを作成し開始する
|
|
||||||
"""
|
|
||||||
initial_continuation = self._get_initial_continuation()
|
|
||||||
if initial_continuation is None:
|
|
||||||
self.terminate()
|
|
||||||
logger.debug(f"[{self.video_id}]No initial continuation.")
|
|
||||||
return
|
|
||||||
self._listen(initial_continuation)
|
|
||||||
|
|
||||||
def _get_initial_continuation(self):
|
|
||||||
''' チャットデータ取得に必要な最初のcontinuationを取得する。'''
|
|
||||||
try:
|
|
||||||
initial_continuation = arcparam.getparam(self.video_id,self.seektime)
|
|
||||||
except ChatParseException as e:
|
|
||||||
self.terminate()
|
|
||||||
logger.debug(f"[{self.video_id}]Error:{str(e)}")
|
|
||||||
return
|
|
||||||
except KeyError:
|
|
||||||
logger.debug(f"[{self.video_id}]KeyError:"
|
|
||||||
f"{traceback.format_exc(limit = -1)}")
|
|
||||||
self.terminate()
|
|
||||||
return
|
|
||||||
return initial_continuation
|
|
||||||
|
|
||||||
def _listen(self, continuation):
|
|
||||||
''' continuationに紐付いたチャットデータを取得し
|
|
||||||
BUfferにチャットデータを格納、
|
|
||||||
次のcontinuaitonを取得してループする
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
continuation : str
|
|
||||||
次のチャットデータ取得に必要なパラメータ
|
|
||||||
'''
|
|
||||||
try:
|
|
||||||
with requests.Session() as session:
|
|
||||||
while(continuation and self._is_alive):
|
|
||||||
if self._pauser.empty():
|
|
||||||
#pause
|
|
||||||
self._pauser.get()
|
|
||||||
#resume
|
|
||||||
#prohibit from blocking by putting None into _pauser.
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
livechat_json = (
|
|
||||||
self._get_livechat_json(continuation, session, headers)
|
|
||||||
)
|
|
||||||
metadata, chatdata = self._parser.parse( livechat_json )
|
|
||||||
timeout = metadata['timeoutMs']/1000
|
|
||||||
chat_component = {
|
|
||||||
"video_id" : self.video_id,
|
|
||||||
"timeout" : timeout,
|
|
||||||
"chatdata" : chatdata
|
|
||||||
}
|
|
||||||
time_mark =time.time()
|
|
||||||
if self._direct_mode:
|
|
||||||
self._callback(
|
|
||||||
self.processor.process([chat_component])
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self._buffer.put(chat_component)
|
|
||||||
diff_time = timeout - (time.time()-time_mark)
|
|
||||||
if diff_time < 0 : diff_time=0
|
|
||||||
time.sleep(diff_time)
|
|
||||||
continuation = metadata.get('continuation')
|
|
||||||
except ChatParseException as e:
|
|
||||||
self.terminate()
|
|
||||||
logger.error(f"{str(e)}(video_id:\"{self.video_id}\")")
|
|
||||||
return
|
|
||||||
except (TypeError , json.JSONDecodeError) :
|
|
||||||
self.terminate()
|
|
||||||
logger.error(f"{traceback.format_exc(limit = -1)}")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.debug(f"[{self.video_id}]チャット取得を終了しました。")
|
|
||||||
|
|
||||||
def _get_livechat_json(self, continuation, session, headers):
|
|
||||||
'''
|
|
||||||
チャットデータが格納されたjsonデータを取得する。
|
|
||||||
'''
|
|
||||||
continuation = urllib.parse.quote(continuation)
|
|
||||||
livechat_json = None
|
|
||||||
status_code = 0
|
|
||||||
url =(
|
|
||||||
f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?"
|
|
||||||
f"continuation={continuation}&pbj=1")
|
|
||||||
for _ in range(MAX_RETRY + 1):
|
|
||||||
with session.get(url ,headers = headers) as resp:
|
|
||||||
try:
|
|
||||||
text = resp.text
|
|
||||||
status_code = resp.status_code
|
|
||||||
livechat_json = json.loads(text)
|
|
||||||
break
|
|
||||||
except json.JSONDecodeError :
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
logger.error(f"[{self.video_id}]"
|
|
||||||
f"Exceeded retry count. status_code={status_code}")
|
|
||||||
self.terminate()
|
|
||||||
return None
|
|
||||||
return livechat_json
|
|
||||||
|
|
||||||
def _callback_loop(self,callback):
|
|
||||||
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
|
|
||||||
callbackに指定された関数に一定間隔でチャットデータを投げる。
|
|
||||||
|
|
||||||
Parameter
|
|
||||||
---------
|
|
||||||
callback : func
|
|
||||||
加工済みのチャットデータを渡す先の関数。
|
|
||||||
"""
|
|
||||||
while self.is_alive():
|
|
||||||
items = self._buffer.get()
|
|
||||||
data = self.processor.process(items)
|
|
||||||
callback(data)
|
|
||||||
|
|
||||||
def get(self):
|
|
||||||
""" bufferからデータを取り出し、processorに投げ、
|
|
||||||
加工済みのチャットデータを返す。
|
|
||||||
|
|
||||||
Returns
|
|
||||||
: Processorによって加工されたチャットデータ
|
|
||||||
"""
|
|
||||||
if self._callback is None:
|
|
||||||
items = self._buffer.get()
|
|
||||||
return self.processor.process(items)
|
|
||||||
raise IllegalFunctionCall(
|
|
||||||
"既にcallbackを登録済みのため、get()は実行できません。")
|
|
||||||
|
|
||||||
def pause(self):
|
|
||||||
if not self._pauser.empty():
|
|
||||||
self._pauser.get()
|
|
||||||
|
|
||||||
def resume(self):
|
|
||||||
if self._pauser.empty():
|
|
||||||
self._pauser.put_nowait(None)
|
|
||||||
|
|
||||||
|
|
||||||
def is_alive(self):
|
|
||||||
return self._is_alive
|
|
||||||
|
|
||||||
def finish(self,sender):
|
|
||||||
'''Listener終了時のコールバック'''
|
|
||||||
try:
|
|
||||||
self.terminate()
|
|
||||||
except RuntimeError:
|
|
||||||
logger.debug(f'[{self.video_id}]cancelled:{sender}')
|
|
||||||
|
|
||||||
def terminate(self):
|
|
||||||
'''
|
|
||||||
Listenerを終了する。
|
|
||||||
'''
|
|
||||||
self._is_alive = False
|
|
||||||
if self._direct_mode == False:
|
|
||||||
#bufferにダミーオブジェクトを入れてis_alive()を判定させる
|
|
||||||
self._buffer.put({'chatdata':'','timeout':1})
|
|
||||||
logger.info(f'[{self.video_id}]終了しました')
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def shutdown(cls, event, sig = None, handler=None):
|
|
||||||
logger.debug("シャットダウンしています")
|
|
||||||
for t in ReplayChat._listeners:
|
|
||||||
t._is_alive = False
|
|
||||||
@@ -1,43 +1,52 @@
|
|||||||
class ChatParseException(Exception):
|
class ChatParseException(Exception):
|
||||||
'''
|
'''
|
||||||
チャットデータをパースするライブラリが投げる例外の基底クラス
|
Base exception thrown by the parser
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoYtinitialdataException(ChatParseException):
|
class NoYtinitialdataException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
配信ページ内にチャットデータurlが見つからないときに投げる例外
|
Thrown when the video is not found.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class ResponseContextError(ChatParseException):
|
class ResponseContextError(ChatParseException):
|
||||||
'''
|
'''
|
||||||
配信ページでチャットデータ無効の時に投げる例外
|
Thrown when chat data is invalid.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoLivechatRendererException(ChatParseException):
|
class NoLivechatRendererException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのJSON中にlivechatRendererがない時に投げる例外
|
Thrown when livechatRenderer is missing in JSON.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class NoContentsException(ChatParseException):
|
class NoContentsException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのJSON中にContinuationContentsがない時に投げる例外
|
Thrown when ContinuationContents is missing in JSON.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class NoContinuationsException(ChatParseException):
|
class NoContinuationsException(ChatParseException):
|
||||||
'''
|
'''
|
||||||
チャットデータのContinuationContents中にcontinuationがない時に投げる例外
|
Thrown when continuation is missing in ContinuationContents.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class IllegalFunctionCall(Exception):
|
class IllegalFunctionCall(Exception):
|
||||||
'''
|
'''
|
||||||
set_callback()を実行済みにもかかわらず
|
Thrown when get () is called even though
|
||||||
get()を呼び出した場合の例外
|
set_callback () has been executed.
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class InvalidVideoIdException(Exception):
|
||||||
|
'''
|
||||||
|
Thrown when the video_id is not exist (VideoInfo).
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnknownConnectionError(Exception):
|
||||||
|
pass
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
import logging
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
|
|
||||||
def get_logger(modname,mode=logging.DEBUG):
|
|
||||||
logger = logging.getLogger(modname)
|
|
||||||
if mode == None:
|
|
||||||
logger.addHandler(logging.NullHandler())
|
|
||||||
return logger
|
|
||||||
logger.setLevel(mode)
|
|
||||||
#create handler1 for showing info
|
|
||||||
handler1 = logging.StreamHandler()
|
|
||||||
my_formatter = MyFormatter()
|
|
||||||
handler1.setFormatter(my_formatter)
|
|
||||||
|
|
||||||
handler1.setLevel(mode)
|
|
||||||
logger.addHandler(handler1)
|
|
||||||
#create handler2 for recording log file
|
|
||||||
if mode <= logging.DEBUG:
|
|
||||||
handler2 = logging.FileHandler(filename="log.txt", encoding='utf-8')
|
|
||||||
handler2.setLevel(logging.ERROR)
|
|
||||||
handler2.setFormatter(my_formatter)
|
|
||||||
|
|
||||||
|
|
||||||
logger.addHandler(handler2)
|
|
||||||
return logger
|
|
||||||
|
|
||||||
class MyFormatter(logging.Formatter):
|
|
||||||
def format(self, record):
|
|
||||||
s =(datetime.datetime.fromtimestamp(record.created)).strftime("%m-%d %H:%M:%S")+'| '+ (record.module).ljust(15)+(' { '+record.funcName).ljust(20) +":"+str(record.lineno).rjust(4)+'} - '+record.getMessage()
|
|
||||||
return s
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
from base64 import urlsafe_b64encode as b64enc
|
from base64 import urlsafe_b64encode as b64enc
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import calendar, datetime, pytz
|
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@@ -13,6 +12,7 @@ Author: taizan-hokuto (2019) @taizan205
|
|||||||
ver 0.0.1 2019.10.05
|
ver 0.0.1 2019.10.05
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def _gen_vid(video_id):
|
def _gen_vid(video_id):
|
||||||
"""generate video_id parameter.
|
"""generate video_id parameter.
|
||||||
Parameter
|
Parameter
|
||||||
@@ -21,10 +21,10 @@ def _gen_vid(video_id):
|
|||||||
|
|
||||||
Return
|
Return
|
||||||
---------
|
---------
|
||||||
byte[] : base64 encoded video_id parameter.
|
bytes : base64 encoded video_id parameter.
|
||||||
"""
|
"""
|
||||||
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
|
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
|
||||||
header_id = video_id.encode()
|
header_id = video_id.encode()
|
||||||
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
|
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
|
||||||
header_terminator = b'\x20\x01'
|
header_terminator = b'\x20\x01'
|
||||||
|
|
||||||
@@ -41,42 +41,46 @@ def _gen_vid(video_id):
|
|||||||
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
||||||
).encode()
|
).encode()
|
||||||
|
|
||||||
|
|
||||||
def _nval(val):
|
def _nval(val):
|
||||||
"""convert value to byte array"""
|
"""convert value to byte array"""
|
||||||
if val<0: raise ValueError
|
if val < 0:
|
||||||
|
raise ValueError
|
||||||
buf = b''
|
buf = b''
|
||||||
while val >> 7:
|
while val >> 7:
|
||||||
m = val & 0xFF | 0x80
|
m = val & 0xFF | 0x80
|
||||||
buf += m.to_bytes(1,'big')
|
buf += m.to_bytes(1, 'big')
|
||||||
val >>= 7
|
val >>= 7
|
||||||
buf += val.to_bytes(1,'big')
|
buf += val.to_bytes(1, 'big')
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
def _build(video_id, seektime, topchatonly = False):
|
|
||||||
switch_01 = b'\x04' if topchatonly else b'\x01'
|
def _build(video_id, seektime, topchat_only):
|
||||||
|
switch_01 = b'\x04' if topchat_only else b'\x01'
|
||||||
if seektime < 0:
|
if seektime < 0:
|
||||||
times =_nval(0)
|
times = _nval(0)
|
||||||
switch = b'\x04'
|
switch = b'\x04'
|
||||||
elif seektime == 0:
|
elif seektime == 0:
|
||||||
times =_nval(1)
|
times = _nval(1)
|
||||||
switch = b'\x03'
|
switch = b'\x03'
|
||||||
else:
|
else:
|
||||||
times =_nval(int(seektime*1000000))
|
times = _nval(int(seektime*1000000))
|
||||||
switch = b'\x03'
|
switch = b'\x03'
|
||||||
parity = b'\x00'
|
parity = b'\x00'
|
||||||
|
|
||||||
header_magic= b'\xA2\x9D\xB0\xD3\x04'
|
header_magic = b'\xA2\x9D\xB0\xD3\x04'
|
||||||
sep_0 = b'\x1A'
|
sep_0 = b'\x1A'
|
||||||
vid = _gen_vid(video_id)
|
vid = _gen_vid(video_id)
|
||||||
time_tag = b'\x28'
|
time_tag = b'\x28'
|
||||||
timestamp1 = times
|
timestamp1 = times
|
||||||
sep_1 = b'\x30\x00\x38\x00\x40\x00\x48'
|
sep_1 = b'\x30\x00\x38\x00\x40\x00\x48'
|
||||||
sep_2 = b'\x52\x1C\x08\x00\x10\x00\x18\x00\x20\x00'
|
sep_2 = b'\x52\x1C\x08\x00\x10\x00\x18\x00\x20\x00'
|
||||||
chkstr = b'\x2A\x0E\x73\x74\x61\x74\x69\x63\x63\x68\x65\x63\x6B\x73\x75\x6D\x40'
|
chkstr = b'\x2A\x0E\x73\x74\x61\x74\x69\x63\x63\x68\x65\x63\x6B\x73\x75\x6D\x40'
|
||||||
sep_3 = b'\x00\x58\x03\x60'
|
sep_3 = b'\x00\x58\x03\x60'
|
||||||
sep_4 = b'\x68' + parity + b'\x72\x04\x08'
|
sep_4 = b'\x68' + parity + b'\x72\x04\x08'
|
||||||
sep_5 = b'\x10' + parity + b'\x78\x00'
|
sep_5 = b'\x10' + parity + b'\x78\x00'
|
||||||
body = [
|
|
||||||
|
body = b''.join([
|
||||||
sep_0,
|
sep_0,
|
||||||
_nval(len(vid)),
|
_nval(len(vid)),
|
||||||
vid,
|
vid,
|
||||||
@@ -91,23 +95,24 @@ def _build(video_id, seektime, topchatonly = False):
|
|||||||
sep_4,
|
sep_4,
|
||||||
switch_01,
|
switch_01,
|
||||||
sep_5
|
sep_5
|
||||||
]
|
])
|
||||||
|
|
||||||
body = reduce(lambda x, y: x+y, body)
|
return urllib.parse.quote(
|
||||||
|
b64enc(header_magic +
|
||||||
return urllib.parse.quote(
|
_nval(len(body)) +
|
||||||
b64enc( header_magic +
|
body
|
||||||
_nval(len(body)) +
|
).decode()
|
||||||
body
|
)
|
||||||
).decode()
|
|
||||||
)
|
|
||||||
|
|
||||||
def getparam(video_id, seektime = 0):
|
|
||||||
|
def getparam(video_id, seektime=0, topchat_only=False):
|
||||||
'''
|
'''
|
||||||
Parameter
|
Parameter
|
||||||
---------
|
---------
|
||||||
seektime : int
|
seektime : int
|
||||||
unit:seconds
|
unit:seconds
|
||||||
start position of fetching chat data.
|
start position of fetching chat data.
|
||||||
|
topchat_only : bool
|
||||||
|
if True, fetch only 'top chat'
|
||||||
'''
|
'''
|
||||||
return _build(video_id, seektime)
|
return _build(video_id, seektime, topchat_only)
|
||||||
|
|||||||
133
pytchat/paramgen/arcparam_mining.py
Normal file
133
pytchat/paramgen/arcparam_mining.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
from base64 import urlsafe_b64encode as b64enc
|
||||||
|
from functools import reduce
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
'''
|
||||||
|
Generate continuation parameter of youtube replay chat.
|
||||||
|
|
||||||
|
Author: taizan-hokuto (2019) @taizan205
|
||||||
|
|
||||||
|
ver 0.0.1 2019.10.05
|
||||||
|
'''
|
||||||
|
|
||||||
|
def _gen_vid_long(video_id):
|
||||||
|
"""generate video_id parameter.
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
video_id : str
|
||||||
|
|
||||||
|
Return
|
||||||
|
---------
|
||||||
|
byte[] : base64 encoded video_id parameter.
|
||||||
|
"""
|
||||||
|
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
|
||||||
|
header_id = video_id.encode()
|
||||||
|
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
|
||||||
|
header_terminator = b'\x20\x01'
|
||||||
|
|
||||||
|
item = [
|
||||||
|
header_magic,
|
||||||
|
_nval(len(header_id)),
|
||||||
|
header_id,
|
||||||
|
header_sep_1,
|
||||||
|
header_id,
|
||||||
|
header_terminator
|
||||||
|
]
|
||||||
|
|
||||||
|
return urllib.parse.quote(
|
||||||
|
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
def _gen_vid(video_id):
|
||||||
|
"""generate video_id parameter.
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
video_id : str
|
||||||
|
|
||||||
|
Return
|
||||||
|
---------
|
||||||
|
bytes : base64 encoded video_id parameter.
|
||||||
|
"""
|
||||||
|
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
|
||||||
|
header_id = video_id.encode()
|
||||||
|
header_terminator = b'\x20\x01'
|
||||||
|
|
||||||
|
item = [
|
||||||
|
header_magic,
|
||||||
|
_nval(len(header_id)),
|
||||||
|
header_id,
|
||||||
|
header_terminator
|
||||||
|
]
|
||||||
|
|
||||||
|
return urllib.parse.quote(
|
||||||
|
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
def _nval(val):
|
||||||
|
"""convert value to byte array"""
|
||||||
|
if val<0: raise ValueError
|
||||||
|
buf = b''
|
||||||
|
while val >> 7:
|
||||||
|
m = val & 0xFF | 0x80
|
||||||
|
buf += m.to_bytes(1,'big')
|
||||||
|
val >>= 7
|
||||||
|
buf += val.to_bytes(1,'big')
|
||||||
|
return buf
|
||||||
|
|
||||||
|
def _build(video_id, seektime, topchat_only):
|
||||||
|
switch_01 = b'\x04' if topchat_only else b'\x01'
|
||||||
|
if seektime < 0:
|
||||||
|
raise ValueError("seektime must be greater than or equal to zero.")
|
||||||
|
if seektime == 0:
|
||||||
|
times = b''
|
||||||
|
else:
|
||||||
|
times =_nval(int(seektime*1000))
|
||||||
|
if seektime > 0:
|
||||||
|
_len_time = ( b'\x5A'
|
||||||
|
+ (len(times)+1).to_bytes(1,'big')
|
||||||
|
+ b'\x10')
|
||||||
|
else:
|
||||||
|
_len_time = b''
|
||||||
|
|
||||||
|
header_magic = b'\xA2\x9D\xB0\xD3\x04'
|
||||||
|
sep_0 = b'\x1A'
|
||||||
|
vid = _gen_vid(video_id)
|
||||||
|
_tag = b'\x40\x01'
|
||||||
|
timestamp1 = times
|
||||||
|
sep_1 = b'\x60\x04\x72\x02\x08'
|
||||||
|
terminator = b'\x78\x01'
|
||||||
|
|
||||||
|
body = [
|
||||||
|
sep_0,
|
||||||
|
_nval(len(vid)),
|
||||||
|
vid,
|
||||||
|
_tag,
|
||||||
|
_len_time,
|
||||||
|
timestamp1,
|
||||||
|
sep_1,
|
||||||
|
switch_01,
|
||||||
|
terminator
|
||||||
|
]
|
||||||
|
|
||||||
|
body = reduce(lambda x, y: x+y, body)
|
||||||
|
|
||||||
|
return urllib.parse.quote(
|
||||||
|
b64enc( header_magic +
|
||||||
|
_nval(len(body)) +
|
||||||
|
body
|
||||||
|
).decode()
|
||||||
|
)
|
||||||
|
|
||||||
|
def getparam(video_id, seektime = 0.0, topchat_only = False):
|
||||||
|
'''
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
seektime : int
|
||||||
|
unit:seconds
|
||||||
|
start position of fetching chat data.
|
||||||
|
topchat_only : bool
|
||||||
|
if True, fetch only 'top chat'
|
||||||
|
'''
|
||||||
|
return _build(video_id, seektime, topchat_only)
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from base64 import urlsafe_b64encode as b64enc
|
from base64 import urlsafe_b64encode as b64enc
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import calendar, datetime, pytz
|
import time
|
||||||
import random
|
import random
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
@@ -11,6 +11,8 @@ Author: taizan-hokuto (2019) @taizan205
|
|||||||
|
|
||||||
ver 0.0.1 2019.10.05
|
ver 0.0.1 2019.10.05
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def _gen_vid(video_id):
|
def _gen_vid(video_id):
|
||||||
"""generate video_id parameter.
|
"""generate video_id parameter.
|
||||||
Parameter
|
Parameter
|
||||||
@@ -22,11 +24,11 @@ def _gen_vid(video_id):
|
|||||||
byte[] : base64 encoded video_id parameter.
|
byte[] : base64 encoded video_id parameter.
|
||||||
"""
|
"""
|
||||||
header_magic = b'\x0A\x0F\x0A\x0D\x0A'
|
header_magic = b'\x0A\x0F\x0A\x0D\x0A'
|
||||||
header_id = video_id.encode()
|
header_id = video_id.encode()
|
||||||
header_sep_1 = b'\x1A'
|
header_sep_1 = b'\x1A'
|
||||||
header_sep_2 = b'\x43\xAA\xB9\xC1\xBD\x01\x3D\x0A'
|
header_sep_2 = b'\x43\xAA\xB9\xC1\xBD\x01\x3D\x0A'
|
||||||
header_suburl = ('https://www.youtube.com/live_chat?v='
|
header_suburl = ('https://www.youtube.com/live_chat?v='
|
||||||
f'{video_id}&is_popout=1').encode()
|
f'{video_id}&is_popout=1').encode()
|
||||||
header_terminator = b'\x20\x02'
|
header_terminator = b'\x20\x02'
|
||||||
|
|
||||||
item = [
|
item = [
|
||||||
@@ -44,62 +46,66 @@ def _gen_vid(video_id):
|
|||||||
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
b64enc(reduce(lambda x, y: x+y, item)).decode()
|
||||||
).encode()
|
).encode()
|
||||||
|
|
||||||
def _tzparity(video_id,times):
|
|
||||||
t=0
|
def _tzparity(video_id, times):
|
||||||
for i,s in enumerate(video_id):
|
t = 0
|
||||||
|
for i, s in enumerate(video_id):
|
||||||
ss = ord(s)
|
ss = ord(s)
|
||||||
if(ss % 2 == 0):
|
if(ss % 2 == 0):
|
||||||
t += ss*(12-i)
|
t += ss*(12-i)
|
||||||
else:
|
else:
|
||||||
t ^= ss*i
|
t ^= ss*i
|
||||||
|
|
||||||
return ((times^t) % 2).to_bytes(1,'big')
|
return ((times ^ t) % 2).to_bytes(1, 'big')
|
||||||
|
|
||||||
|
|
||||||
def _nval(val):
|
def _nval(val):
|
||||||
"""convert value to byte array"""
|
"""convert value to byte array"""
|
||||||
if val<0: raise ValueError
|
if val < 0:
|
||||||
|
raise ValueError
|
||||||
buf = b''
|
buf = b''
|
||||||
while val >> 7:
|
while val >> 7:
|
||||||
m = val & 0xFF | 0x80
|
m = val & 0xFF | 0x80
|
||||||
buf += m.to_bytes(1,'big')
|
buf += m.to_bytes(1, 'big')
|
||||||
val >>= 7
|
val >>= 7
|
||||||
buf += val.to_bytes(1,'big')
|
buf += val.to_bytes(1, 'big')
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
def _build(video_id, _ts1, _ts2, _ts3, _ts4, _ts5, topchatonly = False):
|
|
||||||
#_short_type2
|
|
||||||
switch_01 = b'\x04' if topchatonly else b'\x01'
|
|
||||||
parity = _tzparity(video_id, _ts1^_ts2^_ts3^_ts4^_ts5)
|
|
||||||
|
|
||||||
header_magic= b'\xD2\x87\xCC\xC8\x03'
|
def _build(video_id, _ts1, _ts2, _ts3, _ts4, _ts5, topchat_only):
|
||||||
sep_0 = b'\x1A'
|
# _short_type2
|
||||||
vid = _gen_vid(video_id)
|
switch_01 = b'\x04' if topchat_only else b'\x01'
|
||||||
time_tag = b'\x28'
|
parity = _tzparity(video_id, _ts1 ^ _ts2 ^ _ts3 ^ _ts4 ^ _ts5)
|
||||||
timestamp1 = _nval(_ts1)
|
|
||||||
sep_1 = b'\x30\x00\x38\x00\x40\x02\x4A'
|
|
||||||
un_len = b'\x2B'
|
|
||||||
sep_2 = b'\x08'+parity+b'\x10\x00\x18\x00\x20\x00'
|
|
||||||
chkstr = b'\x2A\x0E\x73\x74\x61\x74\x69\x63\x63\x68\x65\x63\x6B\x73\x75\x6D'
|
|
||||||
sep_3 = b'\x3A\x00\x40\x00\x4A'
|
|
||||||
sep_4_len = b'\x02'
|
|
||||||
sep_4 = b'\x08\x01'
|
|
||||||
ts_2_start = b'\x50'
|
|
||||||
timestamp2 = _nval(_ts2)
|
|
||||||
ts_2_end = b'\x58'
|
|
||||||
sep_5 = b'\x03'
|
|
||||||
ts_3_start = b'\x50'
|
|
||||||
timestamp3 = _nval(_ts3)
|
|
||||||
ts_3_end = b'\x58'
|
|
||||||
timestamp4 = _nval(_ts4)
|
|
||||||
sep_6 = b'\x68'
|
|
||||||
#switch
|
|
||||||
sep_7 = b'\x82\x01\x04\x08'
|
|
||||||
#switch
|
|
||||||
sep_8 = b'\x10\x00'
|
|
||||||
sep_9 = b'\x88\x01\x00\xA0\x01'
|
|
||||||
timestamp5 = _nval(_ts5)
|
|
||||||
|
|
||||||
body = [
|
header_magic = b'\xD2\x87\xCC\xC8\x03'
|
||||||
|
sep_0 = b'\x1A'
|
||||||
|
vid = _gen_vid(video_id)
|
||||||
|
time_tag = b'\x28'
|
||||||
|
timestamp1 = _nval(_ts1)
|
||||||
|
sep_1 = b'\x30\x00\x38\x00\x40\x02\x4A'
|
||||||
|
un_len = b'\x2B'
|
||||||
|
sep_2 = b'\x08'+parity+b'\x10\x00\x18\x00\x20\x00'
|
||||||
|
chkstr = b'\x2A\x0E\x73\x74\x61\x74\x69\x63\x63\x68\x65\x63\x6B\x73\x75\x6D'
|
||||||
|
sep_3 = b'\x3A\x00\x40\x00\x4A'
|
||||||
|
sep_4_len = b'\x02'
|
||||||
|
sep_4 = b'\x08\x01'
|
||||||
|
ts_2_start = b'\x50'
|
||||||
|
timestamp2 = _nval(_ts2)
|
||||||
|
ts_2_end = b'\x58'
|
||||||
|
sep_5 = b'\x03'
|
||||||
|
ts_3_start = b'\x50'
|
||||||
|
timestamp3 = _nval(_ts3)
|
||||||
|
ts_3_end = b'\x58'
|
||||||
|
timestamp4 = _nval(_ts4)
|
||||||
|
sep_6 = b'\x68'
|
||||||
|
# switch
|
||||||
|
sep_7 = b'\x82\x01\x04\x08'
|
||||||
|
# switch
|
||||||
|
sep_8 = b'\x10\x00'
|
||||||
|
sep_9 = b'\x88\x01\x00\xA0\x01'
|
||||||
|
timestamp5 = _nval(_ts5)
|
||||||
|
|
||||||
|
body = b''.join([
|
||||||
sep_0,
|
sep_0,
|
||||||
_nval(len(vid)),
|
_nval(len(vid)),
|
||||||
vid,
|
vid,
|
||||||
@@ -121,46 +127,41 @@ def _build(video_id, _ts1, _ts2, _ts3, _ts4, _ts5, topchatonly = False):
|
|||||||
ts_3_end,
|
ts_3_end,
|
||||||
timestamp4,
|
timestamp4,
|
||||||
sep_6,
|
sep_6,
|
||||||
switch_01,#
|
switch_01,
|
||||||
sep_7,
|
sep_7,
|
||||||
switch_01,#
|
switch_01,
|
||||||
sep_8,
|
sep_8,
|
||||||
sep_9,
|
sep_9,
|
||||||
timestamp5
|
timestamp5
|
||||||
]
|
])
|
||||||
|
|
||||||
|
return urllib.parse.quote(
|
||||||
|
b64enc(header_magic +
|
||||||
|
_nval(len(body)) +
|
||||||
|
body
|
||||||
|
).decode()
|
||||||
|
)
|
||||||
|
|
||||||
body = reduce(lambda x, y: x+y, body)
|
|
||||||
|
|
||||||
return urllib.parse.quote(
|
|
||||||
b64enc( header_magic +
|
|
||||||
_nval(len(body)) +
|
|
||||||
body
|
|
||||||
).decode()
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _times(past_sec):
|
def _times(past_sec):
|
||||||
|
|
||||||
def unixts_now():
|
|
||||||
now = datetime.datetime.now(pytz.utc)
|
|
||||||
return calendar.timegm(now.utctimetuple())
|
|
||||||
|
|
||||||
n = unixts_now()
|
n = int(time.time())
|
||||||
|
|
||||||
_ts1= n - random.uniform(0,1*3)
|
_ts1 = n - random.uniform(0, 1*3)
|
||||||
_ts2= n - random.uniform(0.01,0.99)
|
_ts2 = n - random.uniform(0.01, 0.99)
|
||||||
_ts3= n - past_sec + random.uniform(0,1)
|
_ts3 = n - past_sec + random.uniform(0, 1)
|
||||||
_ts4= n - random.uniform(10*60,60*60)
|
_ts4 = n - random.uniform(10*60, 60*60)
|
||||||
_ts5= n - random.uniform(0.01,0.99)
|
_ts5 = n - random.uniform(0.01, 0.99)
|
||||||
return list(map(lambda x:int(x*1000000),[_ts1,_ts2,_ts3,_ts4,_ts5]))
|
return list(map(lambda x: int(x*1000000), [_ts1, _ts2, _ts3, _ts4, _ts5]))
|
||||||
|
|
||||||
|
|
||||||
def getparam(video_id,past_sec = 0):
|
def getparam(video_id, past_sec=0, topchat_only=False):
|
||||||
'''
|
'''
|
||||||
Parameter
|
Parameter
|
||||||
---------
|
---------
|
||||||
past_sec : int
|
past_sec : int
|
||||||
seconds to load past chat data
|
seconds to load past chat data
|
||||||
|
topchat_only : bool
|
||||||
|
if True, fetch only 'top chat'
|
||||||
'''
|
'''
|
||||||
return _build(video_id,*_times(past_sec))
|
return _build(video_id, *_times(past_sec), topchat_only)
|
||||||
|
|
||||||
|
|||||||
@@ -5,16 +5,12 @@ Parser of live chat JSON.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from .. import config
|
|
||||||
from .. exceptions import (
|
from .. exceptions import (
|
||||||
ResponseContextError,
|
ResponseContextError,
|
||||||
NoContentsException,
|
NoContentsException,
|
||||||
NoContinuationsException,
|
NoContinuationsException,
|
||||||
ChatParseException )
|
ChatParseException )
|
||||||
|
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
|
|
||||||
__slots__ = ['is_replay']
|
__slots__ = ['is_replay']
|
||||||
@@ -26,7 +22,8 @@ class Parser:
|
|||||||
if jsn is None:
|
if jsn is None:
|
||||||
raise ChatParseException('Called with none JSON object.')
|
raise ChatParseException('Called with none JSON object.')
|
||||||
if jsn['response']['responseContext'].get('errors'):
|
if jsn['response']['responseContext'].get('errors'):
|
||||||
raise ResponseContextError('The video_id would be wrong, or video is deleted or private.')
|
raise ResponseContextError('The video_id would be wrong,'
|
||||||
|
'or video is deleted or private.')
|
||||||
contents=jsn['response'].get('continuationContents')
|
contents=jsn['response'].get('continuationContents')
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
@@ -64,12 +61,28 @@ class Parser:
|
|||||||
raise ChatParseException('Finished chat data')
|
raise ChatParseException('Finished chat data')
|
||||||
unknown = list(cont.keys())[0]
|
unknown = list(cont.keys())[0]
|
||||||
if unknown:
|
if unknown:
|
||||||
logger.debug(f"Received unknown continuation type:{unknown}")
|
raise ChatParseException(f"Received unknown continuation type:{unknown}")
|
||||||
metadata = cont.get(unknown)
|
|
||||||
else:
|
else:
|
||||||
raise ChatParseException('Cannot extract continuation data')
|
raise ChatParseException('Cannot extract continuation data')
|
||||||
return self._create_data(metadata, contents)
|
return self._create_data(metadata, contents)
|
||||||
|
|
||||||
|
def reload_continuation(self, contents):
|
||||||
|
"""
|
||||||
|
When `seektime = 0` or seektime is abbreviated ,
|
||||||
|
check if fetched chat json has no chat data.
|
||||||
|
If so, try to fetch playerSeekContinuationData.
|
||||||
|
This function must be run only first fetching.
|
||||||
|
"""
|
||||||
|
cont = contents['liveChatContinuation']['continuations'][0]
|
||||||
|
if cont.get("liveChatReplayContinuationData"):
|
||||||
|
#chat data exist.
|
||||||
|
return None
|
||||||
|
#chat data do not exist, get playerSeekContinuationData.
|
||||||
|
init_cont = cont.get("playerSeekContinuationData")
|
||||||
|
if init_cont:
|
||||||
|
return init_cont.get("continuation")
|
||||||
|
raise ChatParseException('Finished chat data')
|
||||||
|
|
||||||
def _create_data(self, metadata, contents):
|
def _create_data(self, metadata, contents):
|
||||||
actions = contents['liveChatContinuation'].get('actions')
|
actions = contents['liveChatContinuation'].get('actions')
|
||||||
if self.is_replay:
|
if self.is_replay:
|
||||||
@@ -77,7 +90,8 @@ class Parser:
|
|||||||
metadata.setdefault("timeoutMs",interval)
|
metadata.setdefault("timeoutMs",interval)
|
||||||
"""Archived chat has different structures than live chat,
|
"""Archived chat has different structures than live chat,
|
||||||
so make it the same format."""
|
so make it the same format."""
|
||||||
chatdata = [action["replayChatItemAction"]["actions"][0] for action in actions]
|
chatdata = [action["replayChatItemAction"]["actions"][0]
|
||||||
|
for action in actions]
|
||||||
else:
|
else:
|
||||||
metadata.setdefault('timeoutMs', 10000)
|
metadata.setdefault('timeoutMs', 10000)
|
||||||
chatdata = actions
|
chatdata = actions
|
||||||
|
|||||||
@@ -1,23 +1,22 @@
|
|||||||
class ChatProcessor:
|
class ChatProcessor:
|
||||||
'''
|
'''
|
||||||
Listenerからチャットデータ(actions)を受け取り
|
Abstract class that processes chat data.
|
||||||
チャットデータを加工するクラスの抽象クラス
|
Receive chat data (actions) from Listener.
|
||||||
'''
|
'''
|
||||||
def process(self, chat_components: list):
|
def process(self, chat_components: list):
|
||||||
'''
|
'''
|
||||||
チャットデータの加工を表すインターフェース。
|
Interface that represents processing of chat data.
|
||||||
LiveChatオブジェクトから呼び出される。
|
Called from LiveChat object.
|
||||||
|
|
||||||
Parameter
|
Parameter
|
||||||
----------
|
----------
|
||||||
chat_components: List[component]
|
chat_components: List[component]
|
||||||
component : dict {
|
component : dict {
|
||||||
"video_id" : str
|
"video_id" : str
|
||||||
動画ID
|
|
||||||
"timeout" : int
|
"timeout" : int
|
||||||
次のチャットの再読み込みまでの時間(秒)
|
Time to fetch next chat (seconds)
|
||||||
"chatdata" : List[dict]
|
"chatdata" : List[dict]
|
||||||
チャットデータのリスト
|
List of chat data.
|
||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -4,17 +4,19 @@ from .renderer.textmessage import LiveChatTextMessageRenderer
|
|||||||
from .renderer.paidmessage import LiveChatPaidMessageRenderer
|
from .renderer.paidmessage import LiveChatPaidMessageRenderer
|
||||||
from .renderer.paidsticker import LiveChatPaidStickerRenderer
|
from .renderer.paidsticker import LiveChatPaidStickerRenderer
|
||||||
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
|
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
|
||||||
|
from .renderer.membership import LiveChatMembershipItemRenderer
|
||||||
from .. chat_processor import ChatProcessor
|
from .. chat_processor import ChatProcessor
|
||||||
from ... import config
|
from ... import config
|
||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class CompatibleProcessor(ChatProcessor):
|
class CompatibleProcessor(ChatProcessor):
|
||||||
|
|
||||||
def process(self, chat_components: list):
|
def process(self, chat_components: list):
|
||||||
|
|
||||||
chatlist = []
|
chatlist = []
|
||||||
timeout = 0
|
timeout = 0
|
||||||
ret={}
|
ret = {}
|
||||||
ret["kind"] = "youtube#liveChatMessageListResponse"
|
ret["kind"] = "youtube#liveChatMessageListResponse"
|
||||||
ret["etag"] = ""
|
ret["etag"] = ""
|
||||||
ret["nextPageToken"] = ""
|
ret["nextPageToken"] = ""
|
||||||
@@ -23,20 +25,24 @@ class CompatibleProcessor(ChatProcessor):
|
|||||||
for chat_component in chat_components:
|
for chat_component in chat_components:
|
||||||
timeout += chat_component.get('timeout', 0)
|
timeout += chat_component.get('timeout', 0)
|
||||||
chatdata = chat_component.get('chatdata')
|
chatdata = chat_component.get('chatdata')
|
||||||
|
|
||||||
if chatdata is None: break
|
if chatdata is None:
|
||||||
|
break
|
||||||
for action in chatdata:
|
for action in chatdata:
|
||||||
if action is None: continue
|
if action is None:
|
||||||
if action.get('addChatItemAction') is None: continue
|
continue
|
||||||
if action['addChatItemAction'].get('item') is None: continue
|
if action.get('addChatItemAction') is None:
|
||||||
|
continue
|
||||||
|
if action['addChatItemAction'].get('item') is None:
|
||||||
|
continue
|
||||||
|
|
||||||
chat = self.parse(action)
|
chat = self.parse(action)
|
||||||
if chat:
|
if chat:
|
||||||
chatlist.append(chat)
|
chatlist.append(chat)
|
||||||
ret["pollingIntervalMillis"] = int(timeout*1000)
|
ret["pollingIntervalMillis"] = int(timeout*1000)
|
||||||
ret["pageInfo"]={
|
ret["pageInfo"] = {
|
||||||
"totalResults":len(chatlist),
|
"totalResults": len(chatlist),
|
||||||
"resultsPerPage":len(chatlist),
|
"resultsPerPage": len(chatlist),
|
||||||
}
|
}
|
||||||
ret["items"] = chatlist
|
ret["items"] = chatlist
|
||||||
|
|
||||||
@@ -47,8 +53,9 @@ class CompatibleProcessor(ChatProcessor):
|
|||||||
action = sitem.get("addChatItemAction")
|
action = sitem.get("addChatItemAction")
|
||||||
if action:
|
if action:
|
||||||
item = action.get("item")
|
item = action.get("item")
|
||||||
if item is None: return None
|
if item is None:
|
||||||
rd={}
|
return None
|
||||||
|
rd = {}
|
||||||
try:
|
try:
|
||||||
renderer = self.get_renderer(item)
|
renderer = self.get_renderer(item)
|
||||||
if renderer == None:
|
if renderer == None:
|
||||||
@@ -57,25 +64,26 @@ class CompatibleProcessor(ChatProcessor):
|
|||||||
rd["kind"] = "youtube#liveChatMessage"
|
rd["kind"] = "youtube#liveChatMessage"
|
||||||
rd["etag"] = ""
|
rd["etag"] = ""
|
||||||
rd["id"] = 'LCC.' + renderer.get_id()
|
rd["id"] = 'LCC.' + renderer.get_id()
|
||||||
rd["snippet"] = renderer.get_snippet()
|
rd["snippet"] = renderer.get_snippet()
|
||||||
rd["authorDetails"] = renderer.get_authordetails()
|
rd["authorDetails"] = renderer.get_authordetails()
|
||||||
except (KeyError,TypeError,AttributeError) as e:
|
except (KeyError, TypeError, AttributeError) as e:
|
||||||
logger.error(f"Error: {str(type(e))}-{str(e)}")
|
logger.error(f"Error: {str(type(e))}-{str(e)}")
|
||||||
logger.error(f"item: {sitem}")
|
logger.error(f"item: {sitem}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return rd
|
return rd
|
||||||
|
|
||||||
def get_renderer(self, item):
|
def get_renderer(self, item):
|
||||||
if item.get("liveChatTextMessageRenderer"):
|
if item.get("liveChatTextMessageRenderer"):
|
||||||
renderer = LiveChatTextMessageRenderer(item)
|
renderer = LiveChatTextMessageRenderer(item)
|
||||||
elif item.get("liveChatPaidMessageRenderer"):
|
elif item.get("liveChatPaidMessageRenderer"):
|
||||||
renderer = LiveChatPaidMessageRenderer(item)
|
renderer = LiveChatPaidMessageRenderer(item)
|
||||||
elif item.get( "liveChatPaidStickerRenderer"):
|
elif item.get("liveChatPaidStickerRenderer"):
|
||||||
renderer = LiveChatPaidStickerRenderer(item)
|
renderer = LiveChatPaidStickerRenderer(item)
|
||||||
elif item.get("liveChatLegacyPaidMessageRenderer"):
|
elif item.get("liveChatLegacyPaidMessageRenderer"):
|
||||||
renderer = LiveChatLegacyPaidMessageRenderer(item)
|
renderer = LiveChatLegacyPaidMessageRenderer(item)
|
||||||
|
elif item.get("liveChatMembershipItemRenderer"):
|
||||||
|
renderer = LiveChatMembershipItemRenderer(item)
|
||||||
else:
|
else:
|
||||||
renderer = None
|
renderer = None
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
|
|||||||
40
pytchat/processors/compatible/renderer/membership.py
Normal file
40
pytchat/processors/compatible/renderer/membership.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
from .base import BaseRenderer
|
||||||
|
|
||||||
|
|
||||||
|
class LiveChatMembershipItemRenderer(BaseRenderer):
|
||||||
|
def __init__(self, item):
|
||||||
|
super().__init__(item, "newSponsorEvent")
|
||||||
|
|
||||||
|
def get_snippet(self):
|
||||||
|
message = self.get_message(self.renderer)
|
||||||
|
return {
|
||||||
|
"type": self.chattype,
|
||||||
|
"liveChatId": "",
|
||||||
|
"authorChannelId": self.renderer.get("authorExternalChannelId"),
|
||||||
|
"publishedAt": self.get_publishedat(self.renderer.get("timestampUsec", 0)),
|
||||||
|
"hasDisplayContent": True,
|
||||||
|
"displayMessage": message,
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_authordetails(self):
|
||||||
|
authorExternalChannelId = self.renderer.get("authorExternalChannelId")
|
||||||
|
# parse subscriber type
|
||||||
|
isVerified, isChatOwner, _, isChatModerator = (
|
||||||
|
self.get_badges(self.renderer)
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"channelId": authorExternalChannelId,
|
||||||
|
"channelUrl": "http://www.youtube.com/channel/"+authorExternalChannelId,
|
||||||
|
"displayName": self.renderer["authorName"]["simpleText"],
|
||||||
|
"profileImageUrl": self.renderer["authorPhoto"]["thumbnails"][1]["url"],
|
||||||
|
"isVerified": isVerified,
|
||||||
|
"isChatOwner": isChatOwner,
|
||||||
|
"isChatSponsor": True,
|
||||||
|
"isChatModerator": isChatModerator
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_message(self, renderer):
|
||||||
|
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
|
||||||
|
return message, [message]
|
||||||
|
|
||||||
@@ -4,15 +4,18 @@ from .renderer.textmessage import LiveChatTextMessageRenderer
|
|||||||
from .renderer.paidmessage import LiveChatPaidMessageRenderer
|
from .renderer.paidmessage import LiveChatPaidMessageRenderer
|
||||||
from .renderer.paidsticker import LiveChatPaidStickerRenderer
|
from .renderer.paidsticker import LiveChatPaidStickerRenderer
|
||||||
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
|
from .renderer.legacypaid import LiveChatLegacyPaidMessageRenderer
|
||||||
|
from .renderer.membership import LiveChatMembershipItemRenderer
|
||||||
from .. chat_processor import ChatProcessor
|
from .. chat_processor import ChatProcessor
|
||||||
from ... import config
|
from ... import config
|
||||||
|
|
||||||
logger = config.logger(__name__)
|
logger = config.logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Chatdata:
|
class Chatdata:
|
||||||
def __init__(self,chatlist:list, timeout:float):
|
def __init__(self, chatlist: list, timeout: float):
|
||||||
self.items = chatlist
|
self.items = chatlist
|
||||||
self.interval = timeout
|
self.interval = timeout
|
||||||
|
|
||||||
def tick(self):
|
def tick(self):
|
||||||
if self.interval == 0:
|
if self.interval == 0:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
@@ -25,6 +28,7 @@ class Chatdata:
|
|||||||
return
|
return
|
||||||
await asyncio.sleep(self.interval/len(self.items))
|
await asyncio.sleep(self.interval/len(self.items))
|
||||||
|
|
||||||
|
|
||||||
class DefaultProcessor(ChatProcessor):
|
class DefaultProcessor(ChatProcessor):
|
||||||
def process(self, chat_components: list):
|
def process(self, chat_components: list):
|
||||||
|
|
||||||
@@ -35,25 +39,27 @@ class DefaultProcessor(ChatProcessor):
|
|||||||
for component in chat_components:
|
for component in chat_components:
|
||||||
timeout += component.get('timeout', 0)
|
timeout += component.get('timeout', 0)
|
||||||
chatdata = component.get('chatdata')
|
chatdata = component.get('chatdata')
|
||||||
|
if chatdata is None:
|
||||||
if chatdata is None: continue
|
continue
|
||||||
for action in chatdata:
|
for action in chatdata:
|
||||||
if action is None: continue
|
if action is None:
|
||||||
if action.get('addChatItemAction') is None: continue
|
continue
|
||||||
if action['addChatItemAction'].get('item') is None: continue
|
if action.get('addChatItemAction') is None:
|
||||||
|
continue
|
||||||
|
if action['addChatItemAction'].get('item') is None:
|
||||||
|
continue
|
||||||
|
|
||||||
chat = self._parse(action)
|
chat = self._parse(action)
|
||||||
if chat:
|
if chat:
|
||||||
chatlist.append(chat)
|
chatlist.append(chat)
|
||||||
return Chatdata(chatlist, float(timeout))
|
return Chatdata(chatlist, float(timeout))
|
||||||
|
|
||||||
|
|
||||||
def _parse(self, sitem):
|
def _parse(self, sitem):
|
||||||
|
|
||||||
action = sitem.get("addChatItemAction")
|
action = sitem.get("addChatItemAction")
|
||||||
if action:
|
if action:
|
||||||
item = action.get("item")
|
item = action.get("item")
|
||||||
if item is None: return None
|
if item is None:
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
renderer = self._get_renderer(item)
|
renderer = self._get_renderer(item)
|
||||||
if renderer == None:
|
if renderer == None:
|
||||||
@@ -61,20 +67,22 @@ class DefaultProcessor(ChatProcessor):
|
|||||||
|
|
||||||
renderer.get_snippet()
|
renderer.get_snippet()
|
||||||
renderer.get_authordetails()
|
renderer.get_authordetails()
|
||||||
except (KeyError,TypeError,AttributeError) as e:
|
except (KeyError, TypeError) as e:
|
||||||
logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}")
|
logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}")
|
||||||
return None
|
return None
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
def _get_renderer(self, item):
|
def _get_renderer(self, item):
|
||||||
if item.get("liveChatTextMessageRenderer"):
|
if item.get("liveChatTextMessageRenderer"):
|
||||||
renderer = LiveChatTextMessageRenderer(item)
|
renderer = LiveChatTextMessageRenderer(item)
|
||||||
elif item.get("liveChatPaidMessageRenderer"):
|
elif item.get("liveChatPaidMessageRenderer"):
|
||||||
renderer = LiveChatPaidMessageRenderer(item)
|
renderer = LiveChatPaidMessageRenderer(item)
|
||||||
elif item.get( "liveChatPaidStickerRenderer"):
|
elif item.get("liveChatPaidStickerRenderer"):
|
||||||
renderer = LiveChatPaidStickerRenderer(item)
|
renderer = LiveChatPaidStickerRenderer(item)
|
||||||
elif item.get("liveChatLegacyPaidMessageRenderer"):
|
elif item.get("liveChatLegacyPaidMessageRenderer"):
|
||||||
renderer = LiveChatLegacyPaidMessageRenderer(item)
|
renderer = LiveChatLegacyPaidMessageRenderer(item)
|
||||||
|
elif item.get("liveChatMembershipItemRenderer"):
|
||||||
|
renderer = LiveChatMembershipItemRenderer(item)
|
||||||
else:
|
else:
|
||||||
renderer = None
|
renderer = None
|
||||||
return renderer
|
return renderer
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
class Author:
|
class Author:
|
||||||
pass
|
pass
|
||||||
class BaseRenderer:
|
class BaseRenderer:
|
||||||
@@ -60,6 +59,7 @@ class BaseRenderer:
|
|||||||
|
|
||||||
|
|
||||||
def get_badges(self,renderer):
|
def get_badges(self,renderer):
|
||||||
|
self.author.type = ''
|
||||||
isVerified = False
|
isVerified = False
|
||||||
isChatOwner = False
|
isChatOwner = False
|
||||||
isChatSponsor = False
|
isChatSponsor = False
|
||||||
@@ -67,16 +67,19 @@ class BaseRenderer:
|
|||||||
badges=renderer.get("authorBadges")
|
badges=renderer.get("authorBadges")
|
||||||
if badges:
|
if badges:
|
||||||
for badge in badges:
|
for badge in badges:
|
||||||
author_type = badge["liveChatAuthorBadgeRenderer"]["accessibility"]["accessibilityData"]["label"]
|
if badge["liveChatAuthorBadgeRenderer"].get("icon"):
|
||||||
if author_type == '確認済み':
|
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
|
||||||
isVerified = True
|
self.author.type = author_type
|
||||||
if author_type == '所有者':
|
if author_type == 'VERIFIED':
|
||||||
isChatOwner = True
|
isVerified = True
|
||||||
if 'メンバー' in author_type:
|
if author_type == 'OWNER':
|
||||||
|
isChatOwner = True
|
||||||
|
if author_type == 'MODERATOR':
|
||||||
|
isChatModerator = True
|
||||||
|
if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"):
|
||||||
isChatSponsor = True
|
isChatSponsor = True
|
||||||
|
self.author.type = 'MEMBER'
|
||||||
self.get_badgeurl(badge)
|
self.get_badgeurl(badge)
|
||||||
if author_type == 'モデレーター':
|
|
||||||
isChatModerator = True
|
|
||||||
return isVerified, isChatOwner, isChatSponsor, isChatModerator
|
return isVerified, isChatOwner, isChatSponsor, isChatModerator
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
15
pytchat/processors/default/renderer/membership.py
Normal file
15
pytchat/processors/default/renderer/membership.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from .base import BaseRenderer
|
||||||
|
|
||||||
|
|
||||||
|
class LiveChatMembershipItemRenderer(BaseRenderer):
|
||||||
|
def __init__(self, item):
|
||||||
|
super().__init__(item, "newSponsor")
|
||||||
|
|
||||||
|
def get_authordetails(self):
|
||||||
|
super().get_authordetails()
|
||||||
|
self.author.isChatSponsor = True
|
||||||
|
|
||||||
|
def get_message(self, renderer):
|
||||||
|
message = ''.join([mes.get("text", "") for mes in renderer["headerSubtext"]["runs"]])
|
||||||
|
return message, [message]
|
||||||
|
|
||||||
@@ -10,13 +10,9 @@ class LiveChatPaidMessageRenderer(BaseRenderer):
|
|||||||
|
|
||||||
def get_snippet(self):
|
def get_snippet(self):
|
||||||
super().get_snippet()
|
super().get_snippet()
|
||||||
|
|
||||||
self.author.name = self.renderer["authorName"]["simpleText"]
|
|
||||||
|
|
||||||
amountDisplayString, symbol, amount =(
|
amountDisplayString, symbol, amount =(
|
||||||
self.get_amountdata(self.renderer)
|
self.get_amountdata(self.renderer)
|
||||||
)
|
)
|
||||||
self.message = self.get_message(self.renderer)
|
|
||||||
self.amountValue= amount
|
self.amountValue= amount
|
||||||
self.amountString = amountDisplayString
|
self.amountString = amountDisplayString
|
||||||
self.currency= currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
|
self.currency= currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
|
||||||
|
|||||||
@@ -10,13 +10,9 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
|
|||||||
|
|
||||||
def get_snippet(self):
|
def get_snippet(self):
|
||||||
super().get_snippet()
|
super().get_snippet()
|
||||||
|
|
||||||
self.author.name = self.renderer["authorName"]["simpleText"]
|
|
||||||
|
|
||||||
amountDisplayString, symbol, amount =(
|
amountDisplayString, symbol, amount =(
|
||||||
self.get_amountdata(self.renderer)
|
self.get_amountdata(self.renderer)
|
||||||
)
|
)
|
||||||
self.message = ""
|
|
||||||
self.amountValue = amount
|
self.amountValue = amount
|
||||||
self.amountString = amountDisplayString
|
self.amountString = amountDisplayString
|
||||||
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
|
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(symbol) else symbol
|
||||||
|
|||||||
98
pytchat/processors/html_archiver.py
Normal file
98
pytchat/processors/html_archiver.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from .chat_processor import ChatProcessor
|
||||||
|
from .default.processor import DefaultProcessor
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
fmt_headers = ['datetime','elapsed','authorName','message','superchat'
|
||||||
|
,'type','authorChannel']
|
||||||
|
|
||||||
|
HEADER_HTML = '''
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||||
|
'''
|
||||||
|
|
||||||
|
class HTMLArchiver(ChatProcessor):
|
||||||
|
'''
|
||||||
|
HtmlArchiver saves chat data as HTML table format.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, save_path):
|
||||||
|
super().__init__()
|
||||||
|
self.save_path = self._checkpath(save_path)
|
||||||
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
f.write(HEADER_HTML)
|
||||||
|
f.write('<table border="1" style="border-collapse: collapse">')
|
||||||
|
f.writelines(self._parse_html_header(fmt_headers))
|
||||||
|
self.processor = DefaultProcessor()
|
||||||
|
|
||||||
|
def _checkpath(self, filepath):
|
||||||
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
|
body = splitter[0]
|
||||||
|
extention = splitter[1]
|
||||||
|
newpath = filepath
|
||||||
|
counter = 0
|
||||||
|
while os.path.exists(newpath):
|
||||||
|
match = re.search(PATTERN,body)
|
||||||
|
if match:
|
||||||
|
counter=int(match[2])+1
|
||||||
|
num_with_bracket = f'({str(counter)})'
|
||||||
|
body = f'{match[1]}{num_with_bracket}'
|
||||||
|
else:
|
||||||
|
body = f'{body}({str(counter)})'
|
||||||
|
newpath = os.path.join(os.path.dirname(filepath),body+extention)
|
||||||
|
return newpath
|
||||||
|
|
||||||
|
def process(self, chat_components: list):
|
||||||
|
"""
|
||||||
|
Returns
|
||||||
|
----------
|
||||||
|
dict :
|
||||||
|
save_path : str :
|
||||||
|
Actual save path of file.
|
||||||
|
total_lines : int :
|
||||||
|
count of total lines written to the file.
|
||||||
|
"""
|
||||||
|
if chat_components is None or len (chat_components) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
chats = self.processor.process(chat_components).items
|
||||||
|
for c in chats:
|
||||||
|
f.writelines(
|
||||||
|
self._parse_html_line([
|
||||||
|
c.datetime,
|
||||||
|
c.elapsedTime,
|
||||||
|
c.author.name,
|
||||||
|
c.message,
|
||||||
|
c.amountString,
|
||||||
|
c.author.type,
|
||||||
|
c.author.channelId]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
#Palliative treatment#
|
||||||
|
Comment out below line to prevent the table
|
||||||
|
display from collapsing.
|
||||||
|
'''
|
||||||
|
#f.write('</table>')
|
||||||
|
|
||||||
|
def _parse_html_line(self, raw_line):
|
||||||
|
html = ''
|
||||||
|
html+=' <tr>'
|
||||||
|
for cell in raw_line:
|
||||||
|
html+='<td>'+cell+'</td>'
|
||||||
|
html+='</tr>\n'
|
||||||
|
return html
|
||||||
|
|
||||||
|
def _parse_html_header(self,raw_line):
|
||||||
|
html = ''
|
||||||
|
html+='<thead>\n'
|
||||||
|
html+=' <tr>'
|
||||||
|
for cell in raw_line:
|
||||||
|
html+='<th>'+cell+'</th>'
|
||||||
|
html+='</tr>\n'
|
||||||
|
html+='</thead>\n'
|
||||||
|
return html
|
||||||
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
import json
|
|
||||||
from .chat_processor import ChatProcessor
|
|
||||||
|
|
||||||
class JsonDisplayProcessor(ChatProcessor):
|
|
||||||
|
|
||||||
def process(self,chat_components: list):
|
|
||||||
if chat_components:
|
|
||||||
for component in chat_components:
|
|
||||||
chatdata = component.get('chatdata')
|
|
||||||
if chatdata:
|
|
||||||
for chat in chatdata:
|
|
||||||
print(json.dumps(chat,ensure_ascii=False)[:200])
|
|
||||||
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
import datetime
|
|
||||||
from .chat_processor import ChatProcessor
|
|
||||||
|
|
||||||
class JsonfileArchiveProcessor(ChatProcessor):
|
|
||||||
def __init__(self,filepath):
|
|
||||||
super().__init__()
|
|
||||||
if os.path.exists(filepath):
|
|
||||||
print('filepath is already exists!: ')
|
|
||||||
print(' '+filepath)
|
|
||||||
newpath=os.path.dirname(filepath) + \
|
|
||||||
'/'+datetime.datetime.now() \
|
|
||||||
.strftime('%Y-%m-%d %H-%M-%S')+'.data'
|
|
||||||
|
|
||||||
print('created alternate filename:')
|
|
||||||
print(' '+newpath)
|
|
||||||
self.filepath = newpath
|
|
||||||
else:
|
|
||||||
print('filepath: '+filepath)
|
|
||||||
self.filepath = filepath
|
|
||||||
|
|
||||||
def process(self,chat_components: list):
|
|
||||||
if chat_components:
|
|
||||||
with open(self.filepath, mode='a', encoding = 'utf-8') as f:
|
|
||||||
for component in chat_components:
|
|
||||||
if component:
|
|
||||||
chatdata = component.get('chatdata')
|
|
||||||
for action in chatdata:
|
|
||||||
if action:
|
|
||||||
if action.get("addChatItemAction"):
|
|
||||||
if action["addChatItemAction"]["item"].get(
|
|
||||||
"liveChatViewerEngagementMessageRenderer"):
|
|
||||||
continue
|
|
||||||
s = json.dumps(action,ensure_ascii = False)
|
|
||||||
#print(s[:200])
|
|
||||||
f.writelines(s+'\n')
|
|
||||||
|
|
||||||
def _parsedir(self,_dir):
|
|
||||||
if _dir[-1]=='\\' or _dir[-1]=='/':
|
|
||||||
separator =''
|
|
||||||
else:
|
|
||||||
separator ='/'
|
|
||||||
os.makedirs(_dir + separator, exist_ok=True)
|
|
||||||
return _dir + separator
|
|
||||||
|
|
||||||
66
pytchat/processors/jsonfile_archiver.py
Normal file
66
pytchat/processors/jsonfile_archiver.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from .chat_processor import ChatProcessor
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
|
||||||
|
class JsonfileArchiver(ChatProcessor):
|
||||||
|
"""
|
||||||
|
JsonfileArchiver saves chat data as text of JSON lines.
|
||||||
|
|
||||||
|
Parameter:
|
||||||
|
----------
|
||||||
|
save_path : str :
|
||||||
|
save path of file.If a file with the same name exists,
|
||||||
|
it is automatically saved under a different name
|
||||||
|
with suffix '(number)'
|
||||||
|
"""
|
||||||
|
def __init__(self,save_path):
|
||||||
|
super().__init__()
|
||||||
|
self.save_path = self._checkpath(save_path)
|
||||||
|
self.line_counter = 0
|
||||||
|
|
||||||
|
def process(self,chat_components: list):
|
||||||
|
"""
|
||||||
|
Returns
|
||||||
|
----------
|
||||||
|
dict :
|
||||||
|
save_path : str :
|
||||||
|
Actual save path of file.
|
||||||
|
total_lines : int :
|
||||||
|
count of total lines written to the file.
|
||||||
|
"""
|
||||||
|
if chat_components is None: return
|
||||||
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
for component in chat_components:
|
||||||
|
if component is None: continue
|
||||||
|
chatdata = component.get('chatdata')
|
||||||
|
if chatdata is None: continue
|
||||||
|
for action in chatdata:
|
||||||
|
if action is None: continue
|
||||||
|
json_line = json.dumps(action, ensure_ascii = False)
|
||||||
|
f.writelines(json_line+'\n')
|
||||||
|
self.line_counter+=1
|
||||||
|
return { "save_path" : self.save_path,
|
||||||
|
"total_lines": self.line_counter }
|
||||||
|
|
||||||
|
def _checkpath(self, filepath):
|
||||||
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
|
body = splitter[0]
|
||||||
|
extention = splitter[1]
|
||||||
|
newpath = filepath
|
||||||
|
counter = 0
|
||||||
|
while os.path.exists(newpath):
|
||||||
|
match = re.search(PATTERN,body)
|
||||||
|
if match:
|
||||||
|
counter=int(match[2])+1
|
||||||
|
num_with_bracket = f'({str(counter)})'
|
||||||
|
body = f'{match[1]}{num_with_bracket}'
|
||||||
|
else:
|
||||||
|
body = f'{body}({str(counter)})'
|
||||||
|
newpath = os.path.join(os.path.dirname(filepath),body+extention)
|
||||||
|
return newpath
|
||||||
|
|
||||||
|
|
||||||
0
pytchat/processors/speed/__init__.py
Normal file
0
pytchat/processors/speed/__init__.py
Normal file
@@ -3,8 +3,8 @@ speed_calculator.py
|
|||||||
チャットの勢いを算出するChatProcessor
|
チャットの勢いを算出するChatProcessor
|
||||||
Calculate speed of chat.
|
Calculate speed of chat.
|
||||||
"""
|
"""
|
||||||
import calendar, datetime, pytz
|
import time
|
||||||
from .chat_processor import ChatProcessor
|
from .. chat_processor import ChatProcessor
|
||||||
class RingQueue:
|
class RingQueue:
|
||||||
"""
|
"""
|
||||||
リング型キュー
|
リング型キュー
|
||||||
@@ -143,8 +143,7 @@ class SpeedCalculator(ChatProcessor, RingQueue):
|
|||||||
'''
|
'''
|
||||||
チャットデータがない場合に空のデータをキューに投入する。
|
チャットデータがない場合に空のデータをキューに投入する。
|
||||||
'''
|
'''
|
||||||
timestamp_now = calendar.timegm(datetime.datetime.
|
timestamp_now = int(time.time())
|
||||||
now(pytz.utc).utctimetuple())
|
|
||||||
self.put({
|
self.put({
|
||||||
'chat_count':0,
|
'chat_count':0,
|
||||||
'starttime':int(timestamp_now),
|
'starttime':int(timestamp_now),
|
||||||
0
pytchat/processors/superchat/__init__.py
Normal file
0
pytchat/processors/superchat/__init__.py
Normal file
74
pytchat/processors/superchat/calculator.py
Normal file
74
pytchat/processors/superchat/calculator.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import re
|
||||||
|
from pytchat.processors.chat_processor import ChatProcessor
|
||||||
|
|
||||||
|
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
|
||||||
|
|
||||||
|
items_paid = [
|
||||||
|
'addChatItemAction',
|
||||||
|
'item',
|
||||||
|
'liveChatPaidMessageRenderer'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_sticker = [
|
||||||
|
'addChatItemAction',
|
||||||
|
'item',
|
||||||
|
'liveChatPaidStickerRenderer'
|
||||||
|
]
|
||||||
|
|
||||||
|
class SuperchatCalculator(ChatProcessor):
|
||||||
|
"""
|
||||||
|
Calculate the amount of SuperChat by currency.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.results = {}
|
||||||
|
|
||||||
|
def process(self, chat_components: list):
|
||||||
|
"""
|
||||||
|
Return
|
||||||
|
------------
|
||||||
|
results : dict :
|
||||||
|
List of amount by currency.
|
||||||
|
key: currency symbol, value: total amount.
|
||||||
|
"""
|
||||||
|
if chat_components is None:
|
||||||
|
return self.results
|
||||||
|
for component in chat_components:
|
||||||
|
chatdata = component.get('chatdata')
|
||||||
|
if chatdata is None: continue
|
||||||
|
for action in chatdata:
|
||||||
|
renderer = self._get_item(action, items_paid) or \
|
||||||
|
self._get_item(action, items_sticker)
|
||||||
|
if renderer is None: continue
|
||||||
|
symbol, amount = self._parse(renderer)
|
||||||
|
self.results.setdefault(symbol,0)
|
||||||
|
self.results[symbol]+=amount
|
||||||
|
return self.results
|
||||||
|
|
||||||
|
def _parse(self, renderer):
|
||||||
|
purchase_amount_text = renderer["purchaseAmountText"]["simpleText"]
|
||||||
|
m = superchat_regex.search(purchase_amount_text)
|
||||||
|
if m:
|
||||||
|
symbol = m.group(1)
|
||||||
|
amount = float(m.group(2).replace(',',''))
|
||||||
|
else:
|
||||||
|
symbol = ""
|
||||||
|
amount = 0.0
|
||||||
|
return symbol, amount
|
||||||
|
|
||||||
|
def _get_item(self, dict_body, items: list):
|
||||||
|
for item in items:
|
||||||
|
if dict_body is None:
|
||||||
|
break
|
||||||
|
if isinstance(dict_body, dict):
|
||||||
|
dict_body = dict_body.get(item)
|
||||||
|
continue
|
||||||
|
if isinstance(item, int) and \
|
||||||
|
isinstance(dict_body, list) and \
|
||||||
|
len(dict_body) > item:
|
||||||
|
dict_body = dict_body[item]
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
return dict_body
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
70
pytchat/processors/tsv_archiver.py
Normal file
70
pytchat/processors/tsv_archiver.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from .chat_processor import ChatProcessor
|
||||||
|
from .default.processor import DefaultProcessor
|
||||||
|
|
||||||
|
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
|
||||||
|
fmt_headers = ['datetime','elapsed','authorName','message','superchatAmount'
|
||||||
|
,'authorType','authorChannel']
|
||||||
|
|
||||||
|
class TSVArchiver(ChatProcessor):
|
||||||
|
'''
|
||||||
|
TsvArchiver saves chat data as Tab Separated Values format text.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, save_path):
|
||||||
|
super().__init__()
|
||||||
|
self.save_path = self._checkpath(save_path)
|
||||||
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
writer = csv.writer(f, delimiter='\t')
|
||||||
|
writer.writerow(fmt_headers)
|
||||||
|
self.processor = DefaultProcessor()
|
||||||
|
|
||||||
|
def _checkpath(self, filepath):
|
||||||
|
splitter = os.path.splitext(os.path.basename(filepath))
|
||||||
|
body = splitter[0]
|
||||||
|
extention = splitter[1]
|
||||||
|
newpath = filepath
|
||||||
|
counter = 0
|
||||||
|
while os.path.exists(newpath):
|
||||||
|
match = re.search(PATTERN,body)
|
||||||
|
if match:
|
||||||
|
counter=int(match[2])+1
|
||||||
|
num_with_bracket = f'({str(counter)})'
|
||||||
|
body = f'{match[1]}{num_with_bracket}'
|
||||||
|
else:
|
||||||
|
body = f'{body}({str(counter)})'
|
||||||
|
newpath = os.path.join(os.path.dirname(filepath),body+extention)
|
||||||
|
return newpath
|
||||||
|
|
||||||
|
def process(self, chat_components: list):
|
||||||
|
"""
|
||||||
|
Returns
|
||||||
|
----------
|
||||||
|
dict :
|
||||||
|
save_path : str :
|
||||||
|
Actual save path of file.
|
||||||
|
total_lines : int :
|
||||||
|
count of total lines written to the file.
|
||||||
|
"""
|
||||||
|
if chat_components is None or len (chat_components) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(self.save_path, mode='a', encoding = 'utf-8') as f:
|
||||||
|
writer = csv.writer(f, delimiter='\t')
|
||||||
|
chats = self.processor.process(chat_components).items
|
||||||
|
for c in chats:
|
||||||
|
writer.writerow([
|
||||||
|
c.datetime,
|
||||||
|
c.elapsedTime,
|
||||||
|
c.author.name,
|
||||||
|
c.message,
|
||||||
|
c.amountString,
|
||||||
|
c.author.type,
|
||||||
|
c.author.channelId
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
0
pytchat/tool/__init__.py
Normal file
0
pytchat/tool/__init__.py
Normal file
0
pytchat/tool/extract/__init__.py
Normal file
0
pytchat/tool/extract/__init__.py
Normal file
154
pytchat/tool/extract/asyncdl.py
Normal file
154
pytchat/tool/extract/asyncdl.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from . worker import ExtractWorker
|
||||||
|
from . patch import Patch
|
||||||
|
from ... import config
|
||||||
|
from ... paramgen import arcparam
|
||||||
|
from ... exceptions import UnknownConnectionError
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
|
from json import JSONDecodeError
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
headers = config.headers
|
||||||
|
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
|
||||||
|
"get_live_chat_replay?continuation="
|
||||||
|
MAX_RETRY_COUNT = 3
|
||||||
|
|
||||||
|
def _split(start, end, count, min_interval_sec = 120):
|
||||||
|
"""
|
||||||
|
Split section from `start` to `end` into `count` pieces,
|
||||||
|
and returns the beginning of each piece.
|
||||||
|
The `count` is adjusted so that the length of each piece
|
||||||
|
is no smaller than `min_interval`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
List of the offset of each block's first chat data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not (isinstance(start,int) or isinstance(start,float)) or \
|
||||||
|
not (isinstance(end,int) or isinstance(end,float)):
|
||||||
|
raise ValueError("start/end must be int or float")
|
||||||
|
if not isinstance(count,int):
|
||||||
|
raise ValueError("count must be int")
|
||||||
|
if start>end:
|
||||||
|
raise ValueError("end must be equal to or greater than start.")
|
||||||
|
if count<1:
|
||||||
|
raise ValueError("count must be equal to or greater than 1.")
|
||||||
|
if (end-start)/count < min_interval_sec:
|
||||||
|
count = int((end-start)/min_interval_sec)
|
||||||
|
if count == 0 : count = 1
|
||||||
|
interval= (end-start)/count
|
||||||
|
|
||||||
|
if count == 1:
|
||||||
|
return [start]
|
||||||
|
return sorted( list(set( [int(start + interval*j)
|
||||||
|
for j in range(count) ])))
|
||||||
|
|
||||||
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
|
if div <= 0: raise ValueError
|
||||||
|
|
||||||
|
async def _get_blocks( video_id, duration, div, callback):
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
|
for seektime in _split(-1, duration, div)]
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
|
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||||
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
|
try :
|
||||||
|
async with session.get(url, headers = headers) as resp:
|
||||||
|
text = await resp.text()
|
||||||
|
next_continuation, actions = parser.parse(json.loads(text))
|
||||||
|
break
|
||||||
|
except JSONDecodeError:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
else:
|
||||||
|
cancel()
|
||||||
|
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||||
|
|
||||||
|
if actions:
|
||||||
|
first = parser.get_offset(actions[0])
|
||||||
|
last = parser.get_offset(actions[-1])
|
||||||
|
if callback:
|
||||||
|
callback(actions,last-first)
|
||||||
|
return Block(
|
||||||
|
continuation = next_continuation,
|
||||||
|
chat_data = actions,
|
||||||
|
first = first,
|
||||||
|
last = last
|
||||||
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
fetch initial blocks.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
blocks = loop.run_until_complete(
|
||||||
|
_get_blocks(video_id, duration, div, callback))
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def fetch_patch(callback, blocks, video_id):
|
||||||
|
|
||||||
|
async def _allocate_workers():
|
||||||
|
workers = [
|
||||||
|
ExtractWorker(
|
||||||
|
fetch = _fetch, block = block,
|
||||||
|
blocks = blocks, video_id = video_id
|
||||||
|
)
|
||||||
|
for block in blocks
|
||||||
|
]
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = [worker.run(session) for worker in workers]
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
async def _fetch(continuation,session) -> Patch:
|
||||||
|
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
|
||||||
|
for _ in range(MAX_RETRY_COUNT):
|
||||||
|
try:
|
||||||
|
async with session.get(url,headers = config.headers) as resp:
|
||||||
|
chat_json = await resp.text()
|
||||||
|
continuation, actions = parser.parse(json.loads(chat_json))
|
||||||
|
break
|
||||||
|
except JSONDecodeError:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
else:
|
||||||
|
cancel()
|
||||||
|
raise UnknownConnectionError("Abort: Unknown connection error.")
|
||||||
|
|
||||||
|
if actions:
|
||||||
|
last = parser.get_offset(actions[-1])
|
||||||
|
first = parser.get_offset(actions[0])
|
||||||
|
if callback:
|
||||||
|
callback(actions, last - first)
|
||||||
|
return Patch(actions, continuation, first, last)
|
||||||
|
return Patch(continuation = continuation)
|
||||||
|
|
||||||
|
"""
|
||||||
|
allocate workers and assign blocks.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(_allocate_workers())
|
||||||
|
except CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _shutdown():
|
||||||
|
print("\nshutdown...")
|
||||||
|
tasks = [t for t in asyncio.all_tasks()
|
||||||
|
if t is not asyncio.current_task()]
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cancel():
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.create_task(_shutdown())
|
||||||
|
|
||||||
57
pytchat/tool/extract/block.py
Normal file
57
pytchat/tool/extract/block.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from . import parser
|
||||||
|
class Block:
|
||||||
|
"""Block object represents something like a box
|
||||||
|
to join chunk of chatdata.
|
||||||
|
|
||||||
|
Parameter:
|
||||||
|
---------
|
||||||
|
first : int :
|
||||||
|
videoOffsetTimeMs of the first chat_data
|
||||||
|
(chat_data[0])
|
||||||
|
|
||||||
|
last : int :
|
||||||
|
videoOffsetTimeMs of the last chat_data.
|
||||||
|
(chat_data[-1])
|
||||||
|
|
||||||
|
this value increases as fetching chatdata progresses.
|
||||||
|
|
||||||
|
end : int :
|
||||||
|
target videoOffsetTimeMs of last chat data for extract,
|
||||||
|
equals to first videoOffsetTimeMs of next block.
|
||||||
|
when extract worker reaches this offset, stop fetching.
|
||||||
|
|
||||||
|
continuation : str :
|
||||||
|
continuation param of last chat data.
|
||||||
|
|
||||||
|
chat_data : list
|
||||||
|
|
||||||
|
done : bool :
|
||||||
|
whether this block has been fetched.
|
||||||
|
|
||||||
|
remaining : int :
|
||||||
|
remaining data to extract.
|
||||||
|
equals end - last.
|
||||||
|
|
||||||
|
is_last : bool :
|
||||||
|
whether this block is the last one in blocklist.
|
||||||
|
|
||||||
|
during_split : bool :
|
||||||
|
whether this block is in the process of during_split.
|
||||||
|
while True, this block is excluded from duplicate split procedure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ['first','last','end','continuation','chat_data','remaining',
|
||||||
|
'done','is_last','during_split']
|
||||||
|
|
||||||
|
def __init__(self, first = 0, last = 0, end = 0,
|
||||||
|
continuation = '', chat_data = [], is_last = False,
|
||||||
|
during_split = False):
|
||||||
|
self.first = first
|
||||||
|
self.last = last
|
||||||
|
self.end = end
|
||||||
|
self.continuation = continuation
|
||||||
|
self.chat_data = chat_data
|
||||||
|
self.done = False
|
||||||
|
self.remaining = self.end - self.last
|
||||||
|
self.is_last = is_last
|
||||||
|
self.during_split = during_split
|
||||||
156
pytchat/tool/extract/duplcheck.py
Normal file
156
pytchat/tool/extract/duplcheck.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
from . import parser
|
||||||
|
|
||||||
|
def check_duplicate(chatdata):
|
||||||
|
max_range = len(chatdata)-1
|
||||||
|
tbl_offset = [None] * max_range
|
||||||
|
tbl_id = [None] * max_range
|
||||||
|
tbl_type = [None] * max_range
|
||||||
|
|
||||||
|
def create_table(chatdata, max_range):
|
||||||
|
for i in range(max_range):
|
||||||
|
tbl_offset[i] = parser.get_offset(chatdata[i])
|
||||||
|
tbl_id[i] = parser.get_id(chatdata[i])
|
||||||
|
tbl_type[i] = parser.get_type(chatdata[i])
|
||||||
|
|
||||||
|
def is_duplicate(i, j):
|
||||||
|
return (
|
||||||
|
tbl_offset[i] == tbl_offset[j]
|
||||||
|
and
|
||||||
|
tbl_id[i] == tbl_id[j]
|
||||||
|
and
|
||||||
|
tbl_type[i] == tbl_type[j]
|
||||||
|
)
|
||||||
|
print("creating table...")
|
||||||
|
create_table(chatdata,max_range)
|
||||||
|
print("searching duplicate data...")
|
||||||
|
return [{ "i":{
|
||||||
|
"index" : i, "id" : parser.get_id(chatdata[i]),
|
||||||
|
"offsetTime" : parser.get_offset(chatdata[i]),
|
||||||
|
"type" : parser.get_type(chatdata[i])
|
||||||
|
},
|
||||||
|
"j":{
|
||||||
|
"index" : j, "id" : parser.get_id(chatdata[j]),
|
||||||
|
"offsetTime" : parser.get_offset(chatdata[j]),
|
||||||
|
"type" : parser.get_type(chatdata[j])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i in range(max_range) for j in range(i+1,max_range)
|
||||||
|
if is_duplicate(i,j)]
|
||||||
|
|
||||||
|
|
||||||
|
def check_duplicate_offset(chatdata):
|
||||||
|
max_range = len(chatdata)
|
||||||
|
tbl_offset = [None] * max_range
|
||||||
|
tbl_id = [None] * max_range
|
||||||
|
tbl_type = [None] * max_range
|
||||||
|
|
||||||
|
def create_table(chatdata, max_range):
|
||||||
|
for i in range(max_range):
|
||||||
|
tbl_offset[i] = parser.get_offset(chatdata[i])
|
||||||
|
tbl_id[i] = parser.get_id(chatdata[i])
|
||||||
|
tbl_type[i] = parser.get_type(chatdata[i])
|
||||||
|
|
||||||
|
def is_duplicate(i, j):
|
||||||
|
return (
|
||||||
|
tbl_offset[i] == tbl_offset[j]
|
||||||
|
and
|
||||||
|
tbl_id[i] == tbl_id[j]
|
||||||
|
)
|
||||||
|
|
||||||
|
print("creating table...")
|
||||||
|
create_table(chatdata,max_range)
|
||||||
|
print("searching duplicate data...")
|
||||||
|
|
||||||
|
return [{
|
||||||
|
"index" : i, "id" : tbl_id[i],
|
||||||
|
"offsetTime" : tbl_offset[i],
|
||||||
|
"type:" : tbl_type[i]
|
||||||
|
}
|
||||||
|
for i in range(max_range-1)
|
||||||
|
if is_duplicate(i,i+1)]
|
||||||
|
|
||||||
|
def remove_duplicate_head(blocks):
|
||||||
|
if len(blocks) == 0 or len(blocks) == 1:
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def is_duplicate_head(index):
|
||||||
|
|
||||||
|
if len(blocks[index].chat_data) == 0:
|
||||||
|
return True
|
||||||
|
elif len(blocks[index+1].chat_data) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
id_0 = parser.get_id(blocks[index].chat_data[0])
|
||||||
|
id_1 = parser.get_id(blocks[index+1].chat_data[0])
|
||||||
|
type_0 = parser.get_type(blocks[index].chat_data[0])
|
||||||
|
type_1 = parser.get_type(blocks[index+1].chat_data[0])
|
||||||
|
return (
|
||||||
|
blocks[index].first == blocks[index+1].first
|
||||||
|
and
|
||||||
|
id_0 == id_1
|
||||||
|
and
|
||||||
|
type_0 == type_1
|
||||||
|
)
|
||||||
|
ret = [blocks[i] for i in range(len(blocks)-1)
|
||||||
|
if (len(blocks[i].chat_data)>0 and
|
||||||
|
not is_duplicate_head(i) )]
|
||||||
|
ret.append(blocks[-1])
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def remove_duplicate_tail(blocks):
|
||||||
|
if len(blocks) == 0 or len(blocks) == 1:
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def is_duplicate_tail(index):
|
||||||
|
if len(blocks[index].chat_data) == 0:
|
||||||
|
return True
|
||||||
|
elif len(blocks[index-1].chat_data) == 0:
|
||||||
|
return False
|
||||||
|
id_0 = parser.get_id(blocks[index-1].chat_data[-1])
|
||||||
|
id_1 = parser.get_id(blocks[index].chat_data[-1])
|
||||||
|
type_0 = parser.get_type(blocks[index-1].chat_data[-1])
|
||||||
|
type_1 = parser.get_type(blocks[index].chat_data[-1])
|
||||||
|
return (
|
||||||
|
blocks[index-1].last == blocks[index].last
|
||||||
|
and
|
||||||
|
id_0 == id_1
|
||||||
|
and
|
||||||
|
type_0 == type_1
|
||||||
|
)
|
||||||
|
|
||||||
|
ret = [blocks[i] for i in range(0,len(blocks))
|
||||||
|
if i == 0 or not is_duplicate_tail(i) ]
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def remove_overlap(blocks):
|
||||||
|
"""
|
||||||
|
Fix overlapped blocks after ready_blocks().
|
||||||
|
Align the last offset of each block to the first offset
|
||||||
|
of next block (equals `end` offset of each block).
|
||||||
|
"""
|
||||||
|
if len(blocks) == 0 or len(blocks) == 1:
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
if block.is_last:
|
||||||
|
break
|
||||||
|
if len(block.chat_data)==0:
|
||||||
|
continue
|
||||||
|
block_end = block.end
|
||||||
|
if block.last >= block_end:
|
||||||
|
for line in reversed(block.chat_data):
|
||||||
|
if parser.get_offset(line) < block_end:
|
||||||
|
break
|
||||||
|
block.chat_data.pop()
|
||||||
|
block.last = parser.get_offset(line)
|
||||||
|
block.remaining=0
|
||||||
|
block.done=True
|
||||||
|
block.continuation = None
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _dump(blocks):
|
||||||
|
print(f"---------- first last end---")
|
||||||
|
for i,block in enumerate(blocks):
|
||||||
|
print(f"block[{i:3}] {block.first:>10} {block.last:>10} {block.end:>10}")
|
||||||
92
pytchat/tool/extract/extractor.py
Normal file
92
pytchat/tool/extract/extractor.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
from . import asyncdl
|
||||||
|
from . import duplcheck
|
||||||
|
from . import parser
|
||||||
|
from .. videoinfo import VideoInfo
|
||||||
|
from ... import config
|
||||||
|
from ... exceptions import InvalidVideoIdException
|
||||||
|
|
||||||
|
logger = config.logger(__name__)
|
||||||
|
headers=config.headers
|
||||||
|
|
||||||
|
class Extractor:
|
||||||
|
def __init__(self, video_id, div = 1, callback = None, processor = None):
|
||||||
|
if not isinstance(div ,int) or div < 1:
|
||||||
|
raise ValueError('div must be positive integer.')
|
||||||
|
elif div > 10:
|
||||||
|
div = 10
|
||||||
|
self.video_id = video_id
|
||||||
|
self.div = div
|
||||||
|
self.callback = callback
|
||||||
|
self.processor = processor
|
||||||
|
self.duration = self._get_duration_of_video(video_id)
|
||||||
|
self.blocks = []
|
||||||
|
|
||||||
|
def _get_duration_of_video(self, video_id):
|
||||||
|
duration = 0
|
||||||
|
try:
|
||||||
|
duration = VideoInfo(video_id).get_duration()
|
||||||
|
except InvalidVideoIdException:
|
||||||
|
raise
|
||||||
|
return duration
|
||||||
|
|
||||||
|
def _ready_blocks(self):
|
||||||
|
blocks = asyncdl.ready_blocks(
|
||||||
|
self.video_id, self.duration, self.div, self.callback)
|
||||||
|
self.blocks = [block for block in blocks if block]
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _remove_duplicate_head(self):
|
||||||
|
self.blocks = duplcheck.remove_duplicate_head(self.blocks)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _set_block_end(self):
|
||||||
|
if len(self.blocks) > 0:
|
||||||
|
for i in range(len(self.blocks)-1):
|
||||||
|
self.blocks[i].end = self.blocks[i+1].first
|
||||||
|
self.blocks[-1].end = self.duration*1000
|
||||||
|
self.blocks[-1].is_last =True
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _remove_overlap(self):
|
||||||
|
self.blocks = duplcheck.remove_overlap(self.blocks)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _download_blocks(self):
|
||||||
|
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _remove_duplicate_tail(self):
|
||||||
|
self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _combine(self):
|
||||||
|
ret = []
|
||||||
|
for block in self.blocks:
|
||||||
|
ret.extend(block.chat_data)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _execute_extract_operations(self):
|
||||||
|
return (
|
||||||
|
self._ready_blocks()
|
||||||
|
._remove_duplicate_head()
|
||||||
|
._set_block_end()
|
||||||
|
._remove_overlap()
|
||||||
|
._download_blocks()
|
||||||
|
._remove_duplicate_tail()
|
||||||
|
._combine()
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract(self):
|
||||||
|
if self.duration == 0:
|
||||||
|
print("video is not archived.")
|
||||||
|
return []
|
||||||
|
data = self._execute_extract_operations()
|
||||||
|
if self.processor is None:
|
||||||
|
return data
|
||||||
|
return self.processor.process(
|
||||||
|
[{'video_id':None,'timeout':1,'chatdata' : (action
|
||||||
|
["replayChatItemAction"]["actions"][0] for action in data)}]
|
||||||
|
)
|
||||||
|
|
||||||
|
def cancel(self):
|
||||||
|
asyncdl.cancel()
|
||||||
54
pytchat/tool/extract/parser.py
Normal file
54
pytchat/tool/extract/parser.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import json
|
||||||
|
from ... import config
|
||||||
|
from ... exceptions import (
|
||||||
|
ResponseContextError,
|
||||||
|
NoContentsException,
|
||||||
|
NoContinuationsException )
|
||||||
|
|
||||||
|
logger = config.logger(__name__)
|
||||||
|
|
||||||
|
def parse(jsn):
|
||||||
|
"""
|
||||||
|
Parse replay chat data.
|
||||||
|
Parameter:
|
||||||
|
----------
|
||||||
|
jsn : dict
|
||||||
|
JSON of replay chat data.
|
||||||
|
Returns:
|
||||||
|
------
|
||||||
|
continuation : str
|
||||||
|
actions : list
|
||||||
|
|
||||||
|
"""
|
||||||
|
if jsn is None:
|
||||||
|
raise ValueError("parameter JSON is None")
|
||||||
|
if jsn['response']['responseContext'].get('errors'):
|
||||||
|
raise ResponseContextError(
|
||||||
|
'video_id is invalid or private/deleted.')
|
||||||
|
contents=jsn['response'].get('continuationContents')
|
||||||
|
if contents is None:
|
||||||
|
raise NoContentsException('No chat data.')
|
||||||
|
|
||||||
|
cont = contents['liveChatContinuation']['continuations'][0]
|
||||||
|
if cont is None:
|
||||||
|
raise NoContinuationsException('No Continuation')
|
||||||
|
metadata = cont.get('liveChatReplayContinuationData')
|
||||||
|
if metadata:
|
||||||
|
continuation = metadata.get("continuation")
|
||||||
|
actions = contents['liveChatContinuation'].get('actions')
|
||||||
|
return continuation, actions
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
|
def get_offset(item):
|
||||||
|
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
|
||||||
|
|
||||||
|
def get_id(item):
|
||||||
|
return list((list(item['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0])['item'].values())[0].get('id')
|
||||||
|
|
||||||
|
def get_type(item):
|
||||||
|
return list((list(item['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0])['item'].keys())[0]
|
||||||
|
|
||||||
|
|
||||||
54
pytchat/tool/extract/patch.py
Normal file
54
pytchat/tool/extract/patch.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
class Patch(NamedTuple):
|
||||||
|
"""
|
||||||
|
Patch represents chunk of chat data
|
||||||
|
which is fetched by asyncdl.fetch_patch._fetch().
|
||||||
|
"""
|
||||||
|
chats : list = []
|
||||||
|
continuation : str = None
|
||||||
|
first : int = None
|
||||||
|
last : int = None
|
||||||
|
|
||||||
|
def fill(block:Block, patch:Patch):
|
||||||
|
block_end = block.end
|
||||||
|
if patch.last < block_end or block.is_last:
|
||||||
|
set_patch(block, patch)
|
||||||
|
return
|
||||||
|
for line in reversed(patch.chats):
|
||||||
|
line_offset = parser.get_offset(line)
|
||||||
|
if line_offset < block_end:
|
||||||
|
break
|
||||||
|
patch.chats.pop()
|
||||||
|
set_patch(block, patch._replace(
|
||||||
|
continuation = None,
|
||||||
|
last = line_offset
|
||||||
|
)
|
||||||
|
)
|
||||||
|
block.remaining=0
|
||||||
|
block.done=True
|
||||||
|
|
||||||
|
|
||||||
|
def split(parent_block:Block, child_block:Block, patch:Patch):
|
||||||
|
parent_block.during_split = False
|
||||||
|
if patch.first <= parent_block.last:
|
||||||
|
''' When patch overlaps with parent_block,
|
||||||
|
discard this block. '''
|
||||||
|
child_block.continuation = None
|
||||||
|
''' Leave child_block.during_split == True
|
||||||
|
to exclude from during_split sequence. '''
|
||||||
|
return
|
||||||
|
child_block.during_split = False
|
||||||
|
child_block.first = patch.first
|
||||||
|
parent_block.end = patch.first
|
||||||
|
fill(child_block, patch)
|
||||||
|
|
||||||
|
|
||||||
|
def set_patch(block:Block, patch:Patch):
|
||||||
|
block.continuation = patch.continuation
|
||||||
|
block.chat_data.extend(patch.chats)
|
||||||
|
block.last = patch.last
|
||||||
|
block.remaining = block.end-block.last
|
||||||
|
|
||||||
90
pytchat/tool/extract/worker.py
Normal file
90
pytchat/tool/extract/worker.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from . patch import Patch, fill, split
|
||||||
|
from ... paramgen import arcparam
|
||||||
|
|
||||||
|
class ExtractWorker:
|
||||||
|
"""
|
||||||
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
|
When the worker finishes fetching, the block
|
||||||
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
----------
|
||||||
|
fetch : func :
|
||||||
|
extract function of asyncdl
|
||||||
|
|
||||||
|
block : Block :
|
||||||
|
Block object that includes chat_data
|
||||||
|
|
||||||
|
blocks : list :
|
||||||
|
List of Block(s)
|
||||||
|
|
||||||
|
video_id : str :
|
||||||
|
|
||||||
|
parent_block : Block :
|
||||||
|
the block from which current block is splitted
|
||||||
|
"""
|
||||||
|
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
|
||||||
|
|
||||||
|
def __init__(self, fetch, block, blocks, video_id ):
|
||||||
|
self.block = block
|
||||||
|
self.fetch = fetch
|
||||||
|
self.blocks = blocks
|
||||||
|
self.video_id = video_id
|
||||||
|
self.parent_block = None
|
||||||
|
|
||||||
|
async def run(self, session):
|
||||||
|
while self.block.continuation:
|
||||||
|
patch = await self.fetch(
|
||||||
|
self.block.continuation, session)
|
||||||
|
if patch.continuation is None:
|
||||||
|
"""TODO : make the worker assigned to the last block
|
||||||
|
to work more than twice as possible.
|
||||||
|
"""
|
||||||
|
break
|
||||||
|
if self.parent_block:
|
||||||
|
split(self.parent_block, self.block, patch)
|
||||||
|
self.parent_block = None
|
||||||
|
else:
|
||||||
|
fill(self.block, patch)
|
||||||
|
if self.block.continuation is None:
|
||||||
|
"""finished fetching this block """
|
||||||
|
self.block.done = True
|
||||||
|
self.block = _search_new_block(self)
|
||||||
|
|
||||||
|
def _search_new_block(worker) -> Block:
|
||||||
|
index, undone_block = _get_undone_block(worker.blocks)
|
||||||
|
if undone_block is None:
|
||||||
|
return Block(continuation = None)
|
||||||
|
mean = (undone_block.last + undone_block.end)/2
|
||||||
|
continuation = arcparam.getparam(worker.video_id, seektime = mean/1000)
|
||||||
|
worker.parent_block = undone_block
|
||||||
|
worker.parent_block.during_split = True
|
||||||
|
new_block = Block(
|
||||||
|
end = undone_block.end,
|
||||||
|
chat_data = [],
|
||||||
|
continuation = continuation,
|
||||||
|
during_split = True,
|
||||||
|
is_last = worker.parent_block.is_last)
|
||||||
|
'''swap last block'''
|
||||||
|
if worker.parent_block.is_last:
|
||||||
|
worker.parent_block.is_last = False
|
||||||
|
worker.blocks.insert(index+1, new_block)
|
||||||
|
return new_block
|
||||||
|
|
||||||
|
def _get_undone_block(blocks) -> (int, Block):
|
||||||
|
min_interval_ms = 120000
|
||||||
|
max_remaining = 0
|
||||||
|
undone_block = None
|
||||||
|
index_undone_block = 0
|
||||||
|
for index, block in enumerate(blocks):
|
||||||
|
if block.done or block.during_split:
|
||||||
|
continue
|
||||||
|
remaining = block.remaining
|
||||||
|
if remaining > max_remaining and remaining > min_interval_ms:
|
||||||
|
index_undone_block = index
|
||||||
|
undone_block = block
|
||||||
|
max_remaining = remaining
|
||||||
|
return index_undone_block, undone_block
|
||||||
0
pytchat/tool/mining/__init__.py
Normal file
0
pytchat/tool/mining/__init__.py
Normal file
141
pytchat/tool/mining/asyncdl.py
Normal file
141
pytchat/tool/mining/asyncdl.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from . worker import ExtractWorker
|
||||||
|
from . patch import Patch
|
||||||
|
from ... import config
|
||||||
|
from ... paramgen import arcparam_mining as arcparam
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
headers = config.headers
|
||||||
|
REPLAY_URL = "https://www.youtube.com/live_chat_replay?continuation="
|
||||||
|
INTERVAL = 1
|
||||||
|
def _split(start, end, count, min_interval_sec = 120):
|
||||||
|
"""
|
||||||
|
Split section from `start` to `end` into `count` pieces,
|
||||||
|
and returns the beginning of each piece.
|
||||||
|
The `count` is adjusted so that the length of each piece
|
||||||
|
is no smaller than `min_interval`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
List of the offset of each block's first chat data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not (isinstance(start,int) or isinstance(start,float)) or \
|
||||||
|
not (isinstance(end,int) or isinstance(end,float)):
|
||||||
|
raise ValueError("start/end must be int or float")
|
||||||
|
if not isinstance(count,int):
|
||||||
|
raise ValueError("count must be int")
|
||||||
|
if start>end:
|
||||||
|
raise ValueError("end must be equal to or greater than start.")
|
||||||
|
if count<1:
|
||||||
|
raise ValueError("count must be equal to or greater than 1.")
|
||||||
|
if (end-start)/count < min_interval_sec:
|
||||||
|
count = int((end-start)/min_interval_sec)
|
||||||
|
if count == 0 : count = 1
|
||||||
|
interval= (end-start)/count
|
||||||
|
|
||||||
|
if count == 1:
|
||||||
|
return [start]
|
||||||
|
return sorted( list(set( [int(start + interval*j)
|
||||||
|
for j in range(count) ])))
|
||||||
|
|
||||||
|
def ready_blocks(video_id, duration, div, callback):
|
||||||
|
if div <= 0: raise ValueError
|
||||||
|
|
||||||
|
async def _get_blocks( video_id, duration, div, callback):
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = [_create_block(session, video_id, seektime, callback)
|
||||||
|
for seektime in _split(0, duration, div)]
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def _create_block(session, video_id, seektime, callback):
|
||||||
|
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||||
|
url=(f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
|
||||||
|
f"{int(seektime*1000)}&hidden=false&pbj=1")
|
||||||
|
async with session.get(url, headers = headers) as resp:
|
||||||
|
chat_json = await resp.text()
|
||||||
|
if chat_json is None:
|
||||||
|
return
|
||||||
|
continuation, actions = parser.parse(json.loads(chat_json)[1])
|
||||||
|
first = seektime
|
||||||
|
seektime += INTERVAL
|
||||||
|
if callback:
|
||||||
|
callback(actions, INTERVAL)
|
||||||
|
return Block(
|
||||||
|
continuation = continuation,
|
||||||
|
chat_data = actions,
|
||||||
|
first = first,
|
||||||
|
last = seektime,
|
||||||
|
seektime = seektime
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
fetch initial blocks.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
blocks = loop.run_until_complete(
|
||||||
|
_get_blocks(video_id, duration, div, callback))
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def fetch_patch(callback, blocks, video_id):
|
||||||
|
|
||||||
|
async def _allocate_workers():
|
||||||
|
workers = [
|
||||||
|
ExtractWorker(
|
||||||
|
fetch = _fetch, block = block,
|
||||||
|
blocks = blocks, video_id = video_id
|
||||||
|
)
|
||||||
|
for block in blocks
|
||||||
|
]
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = [worker.run(session) for worker in workers]
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
async def _fetch(seektime,session) -> Patch:
|
||||||
|
continuation = arcparam.getparam(video_id, seektime = seektime)
|
||||||
|
url=(f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
|
||||||
|
f"{int(seektime*1000)}&hidden=false&pbj=1")
|
||||||
|
async with session.get(url,headers = config.headers) as resp:
|
||||||
|
chat_json = await resp.text()
|
||||||
|
actions = []
|
||||||
|
try:
|
||||||
|
if chat_json is None:
|
||||||
|
return Patch()
|
||||||
|
continuation, actions = parser.parse(json.loads(chat_json)[1])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
if callback:
|
||||||
|
callback(actions, INTERVAL)
|
||||||
|
return Patch(chats = actions, continuation = continuation,
|
||||||
|
seektime = seektime, last = seektime)
|
||||||
|
"""
|
||||||
|
allocate workers and assign blocks.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(_allocate_workers())
|
||||||
|
except CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _shutdown():
|
||||||
|
print("\nshutdown...")
|
||||||
|
tasks = [t for t in asyncio.all_tasks()
|
||||||
|
if t is not asyncio.current_task()]
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cancel():
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.create_task(_shutdown())
|
||||||
|
|
||||||
62
pytchat/tool/mining/block.py
Normal file
62
pytchat/tool/mining/block.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
from . import parser
|
||||||
|
class Block:
|
||||||
|
"""Block object represents something like a box
|
||||||
|
to join chunk of chatdata.
|
||||||
|
|
||||||
|
Parameter:
|
||||||
|
---------
|
||||||
|
first : int :
|
||||||
|
videoOffsetTimeMs of the first chat_data
|
||||||
|
(chat_data[0])
|
||||||
|
|
||||||
|
last : int :
|
||||||
|
videoOffsetTimeMs of the last chat_data.
|
||||||
|
(chat_data[-1])
|
||||||
|
|
||||||
|
this value increases as fetching chatdata progresses.
|
||||||
|
|
||||||
|
end : int :
|
||||||
|
target videoOffsetTimeMs of last chat data for extract,
|
||||||
|
equals to first videoOffsetTimeMs of next block.
|
||||||
|
when extract worker reaches this offset, stop fetching.
|
||||||
|
|
||||||
|
continuation : str :
|
||||||
|
continuation param of last chat data.
|
||||||
|
|
||||||
|
chat_data : list
|
||||||
|
|
||||||
|
done : bool :
|
||||||
|
whether this block has been fetched.
|
||||||
|
|
||||||
|
remaining : int :
|
||||||
|
remaining data to extract.
|
||||||
|
equals end - last.
|
||||||
|
|
||||||
|
is_last : bool :
|
||||||
|
whether this block is the last one in blocklist.
|
||||||
|
|
||||||
|
during_split : bool :
|
||||||
|
whether this block is in the process of during_split.
|
||||||
|
while True, this block is excluded from duplicate split procedure.
|
||||||
|
|
||||||
|
seektime : float :
|
||||||
|
the last position of this block(seconds) already fetched.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ['first','last','end','continuation','chat_data','remaining',
|
||||||
|
'done','is_last','during_split','seektime']
|
||||||
|
|
||||||
|
def __init__(self, first = 0, last = 0, end = 0,
|
||||||
|
continuation = '', chat_data = [], is_last = False,
|
||||||
|
during_split = False, seektime = None):
|
||||||
|
self.first = first
|
||||||
|
self.last = last
|
||||||
|
self.end = end
|
||||||
|
self.continuation = continuation
|
||||||
|
self.chat_data = chat_data
|
||||||
|
self.done = False
|
||||||
|
self.remaining = self.end - self.last
|
||||||
|
self.is_last = is_last
|
||||||
|
self.during_split = during_split
|
||||||
|
self.seektime = seektime
|
||||||
|
|
||||||
67
pytchat/tool/mining/parser.py
Normal file
67
pytchat/tool/mining/parser.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import json
|
||||||
|
from ... import config
|
||||||
|
from ... exceptions import (
|
||||||
|
ResponseContextError,
|
||||||
|
NoContentsException,
|
||||||
|
NoContinuationsException )
|
||||||
|
|
||||||
|
logger = config.logger(__name__)
|
||||||
|
|
||||||
|
def parse(jsn):
|
||||||
|
"""
|
||||||
|
Parse replay chat data.
|
||||||
|
Parameter:
|
||||||
|
----------
|
||||||
|
jsn : dict
|
||||||
|
JSON of replay chat data.
|
||||||
|
Returns:
|
||||||
|
------
|
||||||
|
continuation : str
|
||||||
|
actions : list
|
||||||
|
|
||||||
|
"""
|
||||||
|
if jsn is None:
|
||||||
|
raise ValueError("parameter JSON is None")
|
||||||
|
if jsn['response']['responseContext'].get('errors'):
|
||||||
|
raise ResponseContextError(
|
||||||
|
'video_id is invalid or private/deleted.')
|
||||||
|
contents=jsn["response"].get('continuationContents')
|
||||||
|
if contents is None:
|
||||||
|
raise NoContentsException('No chat data.')
|
||||||
|
|
||||||
|
cont = contents['liveChatContinuation']['continuations'][0]
|
||||||
|
if cont is None:
|
||||||
|
raise NoContinuationsException('No Continuation')
|
||||||
|
metadata = cont.get('liveChatReplayContinuationData')
|
||||||
|
if metadata:
|
||||||
|
continuation = metadata.get("continuation")
|
||||||
|
actions = contents['liveChatContinuation'].get('actions')
|
||||||
|
if continuation:
|
||||||
|
return continuation, [action["replayChatItemAction"]["actions"][0]
|
||||||
|
for action in actions
|
||||||
|
if list(action['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0]['item'].get("liveChatPaidMessageRenderer")
|
||||||
|
or list(action['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0]['item'].get("liveChatPaidStickerRenderer")
|
||||||
|
]
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
|
def get_offset(item):
|
||||||
|
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
|
||||||
|
|
||||||
|
def get_id(item):
|
||||||
|
return list((list(item['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0])['item'].values())[0].get('id')
|
||||||
|
|
||||||
|
def get_type(item):
|
||||||
|
return list((list(item['replayChatItemAction']["actions"][0].values()
|
||||||
|
)[0])['item'].keys())[0]
|
||||||
|
import re
|
||||||
|
_REGEX_YTINIT = re.compile("window\\[\"ytInitialData\"\\]\\s*=\\s*({.+?});\\s+")
|
||||||
|
def extract(text):
|
||||||
|
|
||||||
|
match = re.findall(_REGEX_YTINIT, str(text))
|
||||||
|
if match:
|
||||||
|
return match[0]
|
||||||
|
return None
|
||||||
27
pytchat/tool/mining/patch.py
Normal file
27
pytchat/tool/mining/patch.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
class Patch(NamedTuple):
|
||||||
|
"""
|
||||||
|
Patch represents chunk of chat data
|
||||||
|
which is fetched by asyncdl.fetch_patch._fetch().
|
||||||
|
"""
|
||||||
|
chats : list = []
|
||||||
|
continuation : str = None
|
||||||
|
seektime : float = None
|
||||||
|
first : int = None
|
||||||
|
last : int = None
|
||||||
|
|
||||||
|
def fill(block:Block, patch:Patch):
|
||||||
|
if patch.last < block.end:
|
||||||
|
set_patch(block, patch)
|
||||||
|
return
|
||||||
|
block.continuation = None
|
||||||
|
|
||||||
|
def set_patch(block:Block, patch:Patch):
|
||||||
|
block.continuation = patch.continuation
|
||||||
|
block.chat_data.extend(patch.chats)
|
||||||
|
block.last = patch.seektime
|
||||||
|
block.seektime = patch.seektime
|
||||||
|
|
||||||
72
pytchat/tool/mining/superchat_miner.py
Normal file
72
pytchat/tool/mining/superchat_miner.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
from . import asyncdl
|
||||||
|
from . import parser
|
||||||
|
from .. videoinfo import VideoInfo
|
||||||
|
from ... import config
|
||||||
|
from ... exceptions import InvalidVideoIdException
|
||||||
|
logger = config.logger(__name__)
|
||||||
|
headers=config.headers
|
||||||
|
|
||||||
|
class SuperChatMiner:
|
||||||
|
def __init__(self, video_id, duration, div, callback):
|
||||||
|
if not isinstance(div ,int) or div < 1:
|
||||||
|
raise ValueError('div must be positive integer.')
|
||||||
|
elif div > 10:
|
||||||
|
div = 10
|
||||||
|
if not isinstance(duration ,int) or duration < 1:
|
||||||
|
raise ValueError('duration must be positive integer.')
|
||||||
|
self.video_id = video_id
|
||||||
|
self.duration = duration
|
||||||
|
self.div = div
|
||||||
|
self.callback = callback
|
||||||
|
self.blocks = []
|
||||||
|
|
||||||
|
def _ready_blocks(self):
|
||||||
|
blocks = asyncdl.ready_blocks(
|
||||||
|
self.video_id, self.duration, self.div, self.callback)
|
||||||
|
self.blocks = [block for block in blocks if block is not None]
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _set_block_end(self):
|
||||||
|
for i in range(len(self.blocks)-1):
|
||||||
|
self.blocks[i].end = self.blocks[i+1].first
|
||||||
|
self.blocks[-1].end = self.duration
|
||||||
|
self.blocks[-1].is_last =True
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _download_blocks(self):
|
||||||
|
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _combine(self):
|
||||||
|
ret = []
|
||||||
|
for block in self.blocks:
|
||||||
|
ret.extend(block.chat_data)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def extract(self):
|
||||||
|
return (
|
||||||
|
self._ready_blocks()
|
||||||
|
._set_block_end()
|
||||||
|
._download_blocks()
|
||||||
|
._combine()
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract(video_id, div = 1, callback = None, processor = None):
|
||||||
|
duration = 0
|
||||||
|
try:
|
||||||
|
duration = VideoInfo(video_id).get_duration()
|
||||||
|
except InvalidVideoIdException:
|
||||||
|
raise
|
||||||
|
if duration == 0:
|
||||||
|
print("video is live.")
|
||||||
|
return []
|
||||||
|
data = SuperChatMiner(video_id, duration, div, callback).extract()
|
||||||
|
if processor is None:
|
||||||
|
return data
|
||||||
|
return processor.process(
|
||||||
|
[{'video_id':None,'timeout':1,'chatdata' : (action
|
||||||
|
for action in data)}]
|
||||||
|
)
|
||||||
|
|
||||||
|
def cancel():
|
||||||
|
asyncdl.cancel()
|
||||||
45
pytchat/tool/mining/worker.py
Normal file
45
pytchat/tool/mining/worker.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from . import parser
|
||||||
|
from . block import Block
|
||||||
|
from . patch import Patch, fill
|
||||||
|
from ... paramgen import arcparam
|
||||||
|
INTERVAL = 1
|
||||||
|
class ExtractWorker:
|
||||||
|
"""
|
||||||
|
ExtractWorker associates a download session with a block.
|
||||||
|
|
||||||
|
When the worker finishes fetching, the block
|
||||||
|
being fetched is splitted and assigned the free worker.
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
----------
|
||||||
|
fetch : func :
|
||||||
|
extract function of asyncdl
|
||||||
|
|
||||||
|
block : Block :
|
||||||
|
Block object that includes chat_data
|
||||||
|
|
||||||
|
blocks : list :
|
||||||
|
List of Block(s)
|
||||||
|
|
||||||
|
video_id : str :
|
||||||
|
|
||||||
|
parent_block : Block :
|
||||||
|
the block from which current block is splitted
|
||||||
|
"""
|
||||||
|
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
|
||||||
|
def __init__(self, fetch, block, blocks, video_id ):
|
||||||
|
self.block:Block = block
|
||||||
|
self.fetch = fetch
|
||||||
|
self.blocks:list = blocks
|
||||||
|
self.video_id:str = video_id
|
||||||
|
self.parent_block:Block = None
|
||||||
|
|
||||||
|
async def run(self, session):
|
||||||
|
while self.block.continuation:
|
||||||
|
patch = await self.fetch(
|
||||||
|
self.block.seektime, session)
|
||||||
|
fill(self.block, patch)
|
||||||
|
self.block.seektime += INTERVAL
|
||||||
|
self.block.done = True
|
||||||
|
|
||||||
|
|
||||||
153
pytchat/tool/videoinfo.py
Normal file
153
pytchat/tool/videoinfo.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from .. import config
|
||||||
|
from .. import util
|
||||||
|
from ..exceptions import InvalidVideoIdException
|
||||||
|
|
||||||
|
headers = config.headers
|
||||||
|
|
||||||
|
pattern = re.compile(r"yt\.setConfig\({'PLAYER_CONFIG': ({.*})}\);")
|
||||||
|
|
||||||
|
item_channel_id =[
|
||||||
|
"videoDetails",
|
||||||
|
"embeddedPlayerOverlayVideoDetailsRenderer",
|
||||||
|
"channelThumbnailEndpoint",
|
||||||
|
"channelThumbnailEndpoint",
|
||||||
|
"urlEndpoint",
|
||||||
|
"urlEndpoint",
|
||||||
|
"url"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_renderer = [
|
||||||
|
"embedPreview",
|
||||||
|
"thumbnailPreviewRenderer"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_response = [
|
||||||
|
"args",
|
||||||
|
"embedded_player_response"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_author_image =[
|
||||||
|
"videoDetails",
|
||||||
|
"embeddedPlayerOverlayVideoDetailsRenderer",
|
||||||
|
"channelThumbnail",
|
||||||
|
"thumbnails",
|
||||||
|
0,
|
||||||
|
"url"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_thumbnail = [
|
||||||
|
"defaultThumbnail",
|
||||||
|
"thumbnails",
|
||||||
|
2,
|
||||||
|
"url"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_channel_name = [
|
||||||
|
"videoDetails",
|
||||||
|
"embeddedPlayerOverlayVideoDetailsRenderer",
|
||||||
|
"expandedRenderer",
|
||||||
|
"embeddedPlayerOverlayVideoDetailsExpandedRenderer",
|
||||||
|
"title",
|
||||||
|
"runs",
|
||||||
|
0,
|
||||||
|
"text"
|
||||||
|
]
|
||||||
|
|
||||||
|
item_moving_thumbnail = [
|
||||||
|
"movingThumbnail",
|
||||||
|
"thumbnails",
|
||||||
|
0,
|
||||||
|
"url"
|
||||||
|
]
|
||||||
|
|
||||||
|
class VideoInfo:
|
||||||
|
'''
|
||||||
|
VideoInfo object retrieves YouTube video information.
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
video_id : str
|
||||||
|
|
||||||
|
Exception
|
||||||
|
---------
|
||||||
|
InvalidVideoIdException :
|
||||||
|
Occurs when video_id does not exist on YouTube.
|
||||||
|
'''
|
||||||
|
def __init__(self, video_id):
|
||||||
|
self.video_id = video_id
|
||||||
|
text = self._get_page_text(video_id)
|
||||||
|
self._parse(text)
|
||||||
|
|
||||||
|
def _get_page_text(self, video_id):
|
||||||
|
url = f"https://www.youtube.com/embed/{video_id}"
|
||||||
|
resp = requests.get(url, headers = headers)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.text
|
||||||
|
|
||||||
|
def _parse(self, text):
|
||||||
|
result = re.search(pattern, text)
|
||||||
|
res= json.loads(result.group(1))
|
||||||
|
response = self._get_item(res, item_response)
|
||||||
|
if response is None:
|
||||||
|
self._check_video_is_private(res.get("args"))
|
||||||
|
self._renderer = self._get_item(json.loads(response), item_renderer)
|
||||||
|
if self._renderer is None:
|
||||||
|
raise InvalidVideoIdException(
|
||||||
|
f"No renderer found in video_id: [{self.video_id}].")
|
||||||
|
|
||||||
|
def _check_video_is_private(self,args):
|
||||||
|
if args and args.get("video_id"):
|
||||||
|
raise InvalidVideoIdException(
|
||||||
|
f"video_id [{self.video_id}] is private or deleted.")
|
||||||
|
raise InvalidVideoIdException(
|
||||||
|
f"video_id [{self.video_id}] is invalid.")
|
||||||
|
|
||||||
|
def _get_item(self, dict_body, items: list):
|
||||||
|
for item in items:
|
||||||
|
if dict_body is None:
|
||||||
|
break
|
||||||
|
if isinstance(dict_body, dict):
|
||||||
|
dict_body = dict_body.get(item)
|
||||||
|
continue
|
||||||
|
if isinstance(item, int) and \
|
||||||
|
isinstance(dict_body, list) and \
|
||||||
|
len(dict_body) > item:
|
||||||
|
dict_body = dict_body[item]
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
return dict_body
|
||||||
|
|
||||||
|
def get_duration(self):
|
||||||
|
duration_seconds = self._renderer.get("videoDurationSeconds")
|
||||||
|
if duration_seconds:
|
||||||
|
'''Fetched value is string, so cast to integer.'''
|
||||||
|
return int(duration_seconds)
|
||||||
|
'''When key is not found, explicitly returns None.'''
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
if self._renderer.get("title"):
|
||||||
|
return [''.join(run["text"])
|
||||||
|
for run in self._renderer["title"]["runs"]][0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_channel_id(self):
|
||||||
|
channel_url = self._get_item(self._renderer, item_channel_id)
|
||||||
|
if channel_url:
|
||||||
|
return channel_url[9:]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_author_image(self):
|
||||||
|
return self._get_item(self._renderer, item_author_image)
|
||||||
|
|
||||||
|
def get_thumbnail(self):
|
||||||
|
return self._get_item(self._renderer, item_thumbnail)
|
||||||
|
|
||||||
|
def get_channel_name(self):
|
||||||
|
return self._get_item(self._renderer, item_channel_name)
|
||||||
|
|
||||||
|
def get_moving_thumbnail(self):
|
||||||
|
return self._get_item(self._renderer, item_moving_thumbnail)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import requests,json,datetime
|
import requests,json,datetime
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
def download(url):
|
def extract(url):
|
||||||
_session = requests.Session()
|
_session = requests.Session()
|
||||||
html = _session.get(url, headers=config.headers)
|
html = _session.get(url, headers=config.headers)
|
||||||
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
with open(str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
|
||||||
|
|||||||
64
setup.py
64
setup.py
@@ -1,6 +1,5 @@
|
|||||||
from setuptools import setup, find_packages, Command
|
from setuptools import setup, find_packages, Command
|
||||||
#from codecs import open as open_c
|
from os import path, system, remove, rename, removedirs
|
||||||
from os import path, system, remove, rename
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
package_name = "pytchat"
|
package_name = "pytchat"
|
||||||
@@ -8,19 +7,27 @@ package_name = "pytchat"
|
|||||||
root_dir = path.abspath(path.dirname(__file__))
|
root_dir = path.abspath(path.dirname(__file__))
|
||||||
|
|
||||||
def _requirements():
|
def _requirements():
|
||||||
return [name.rstrip() for name in open(path.join(root_dir, 'requirements.txt')).readlines()]
|
return [name.rstrip()
|
||||||
|
for name in open(path.join(
|
||||||
|
root_dir, 'requirements.txt')).readlines()]
|
||||||
|
|
||||||
def _test_requirements():
|
def _test_requirements():
|
||||||
return [name.rstrip() for name in open(path.join(root_dir, 'requirements_test.txt')).readlines()]
|
return [name.rstrip()
|
||||||
|
for name in open(path.join(
|
||||||
|
root_dir, 'requirements_test.txt')).readlines()]
|
||||||
|
|
||||||
with open(path.join(root_dir, package_name, '__init__.py')) as f:
|
with open(path.join(root_dir, package_name, '__init__.py')) as f:
|
||||||
init_text = f.read()
|
init_text = f.read()
|
||||||
version = re.search(r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
version = re.search(
|
||||||
license = re.search(r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
r'__version__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||||
author = re.search(r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
license = re.search(
|
||||||
author_email = re.search(r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
r'__license__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||||
url = re.search(r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
author = re.search(
|
||||||
|
r'__author__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||||
|
author_email = re.search(
|
||||||
|
r'__author_email__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||||
|
url = re.search(
|
||||||
|
r'__url__\s*=\s*[\'\"](.+?)[\'\"]', init_text).group(1)
|
||||||
|
|
||||||
assert version
|
assert version
|
||||||
assert license
|
assert license
|
||||||
@@ -30,43 +37,36 @@ assert url
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
with open('README.MD', 'r', encoding='utf-8') as f:
|
|
||||||
txt = f.read()
|
|
||||||
|
|
||||||
with open('README1.MD', 'w', encoding='utf-8', newline='\n') as f:
|
|
||||||
f.write(txt)
|
|
||||||
|
|
||||||
remove("README.MD")
|
|
||||||
rename("README1.MD","README.MD")
|
|
||||||
with open('README.md', encoding='utf-8') as f:
|
with open('README.md', encoding='utf-8') as f:
|
||||||
long_description = f.read()
|
long_description = f.read()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=package_name,
|
|
||||||
packages=find_packages(),
|
|
||||||
version=version,
|
|
||||||
url=url,
|
|
||||||
author=author,
|
author=author,
|
||||||
author_email=author_email,
|
author_email=author_email,
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type='text/markdown',
|
|
||||||
license=license,
|
|
||||||
install_requires=_requirements(),
|
|
||||||
tests_require=_test_requirements(),
|
|
||||||
description="a python library for fetching youtube live chat.",
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Natural Language :: Japanese',
|
'Natural Language :: Japanese',
|
||||||
'Development Status :: 4 - Beta',
|
'Development Status :: 4 - Beta',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Programming Language :: Python :: 3',
|
|
||||||
'Programming Language :: Python :: 3.4',
|
|
||||||
'Programming Language :: Python :: 3.5',
|
|
||||||
'Programming Language :: Python :: 3.6',
|
|
||||||
'Programming Language :: Python :: 3.7',
|
'Programming Language :: Python :: 3.7',
|
||||||
|
'Programming Language :: Python :: 3.8',
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
],
|
],
|
||||||
|
description="a python library for fetching youtube live chat.",
|
||||||
|
entry_points=
|
||||||
|
'''
|
||||||
|
[console_scripts]
|
||||||
|
pytchat=pytchat.cli:main
|
||||||
|
''',
|
||||||
|
install_requires=_requirements(),
|
||||||
keywords='youtube livechat asyncio',
|
keywords='youtube livechat asyncio',
|
||||||
|
license=license,
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
name=package_name,
|
||||||
|
packages=find_packages(exclude=['*log.txt','*tests','*testrun']),
|
||||||
|
url=url,
|
||||||
|
version=version,
|
||||||
)
|
)
|
||||||
@@ -1,29 +1,28 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from pytchat.parser.replay import Parser
|
from pytchat.parser.live import Parser
|
||||||
import pytchat.config as config
|
import pytchat.config as config
|
||||||
import requests, json
|
import requests, json
|
||||||
from pytchat.paramgen import arcparam
|
from pytchat.paramgen import arcparam
|
||||||
|
|
||||||
def test_arcparam_0(mocker):
|
def test_arcparam_0(mocker):
|
||||||
param = arcparam.getparam("01234567890",-1)
|
param = arcparam.getparam("01234567890",-1)
|
||||||
assert "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoADAAOABAAEgEUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA" == param
|
assert param == "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoADAAOABAAEgEUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA"
|
||||||
|
|
||||||
|
|
||||||
def test_arcparam_1(mocker):
|
def test_arcparam_1(mocker):
|
||||||
param = arcparam.getparam("01234567890", seektime = 100000)
|
param = arcparam.getparam("01234567890", seektime = 100000)
|
||||||
assert "op2w0wR3GjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QogNDbw_QCMAA4AEAASANSHAgAEAAYACAAKg5zdGF0aWNjaGVja3N1bUAAWANgAWgAcgQIARAAeAA%3D" == param
|
assert param == "op2w0wR3GjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QogNDbw_QCMAA4AEAASANSHAgAEAAYACAAKg5zdGF0aWNjaGVja3N1bUAAWANgAWgAcgQIARAAeAA%3D"
|
||||||
|
|
||||||
def test_arcparam_2(mocker):
|
def test_arcparam_2(mocker):
|
||||||
param = arcparam.getparam("SsjCnHOk-Sk")
|
param = arcparam.getparam("SsjCnHOk-Sk")
|
||||||
url=f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?continuation={param}&pbj=1"
|
url=f"https://www.youtube.com/live_chat_replay/get_live_chat_replay?continuation={param}&pbj=1"
|
||||||
resp = requests.Session().get(url,headers = config.headers)
|
resp = requests.Session().get(url,headers = config.headers)
|
||||||
jsn = json.loads(resp.text)
|
jsn = json.loads(resp.text)
|
||||||
parser = Parser()
|
parser = Parser(is_replay=True)
|
||||||
_ , chatdata = parser.parse(jsn)
|
contents= parser.get_contents(jsn)
|
||||||
|
_ , chatdata = parser.parse(contents)
|
||||||
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatTextMessageRenderer"]["id"]
|
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatTextMessageRenderer"]["id"]
|
||||||
print(test_id)
|
assert test_id == "CjoKGkNMYXBzZTdudHVVQ0Zjc0IxZ0FkTnFnQjVREhxDSnlBNHV2bnR1VUNGV0dnd2dvZDd3NE5aZy0w"
|
||||||
assert "CjoKGkNMYXBzZTdudHVVQ0Zjc0IxZ0FkTnFnQjVREhxDSnlBNHV2bnR1VUNGV0dnd2dvZDd3NE5aZy0w" == test_id
|
|
||||||
|
|
||||||
def test_arcparam_3(mocker):
|
def test_arcparam_3(mocker):
|
||||||
param = arcparam.getparam("01234567890")
|
param = arcparam.getparam("01234567890")
|
||||||
assert "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoATAAOABAAEgDUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA" == param
|
assert param == "op2w0wRyGjxDZzhhRFFvTE1ERXlNelExTmpjNE9UQWFFLXFvM2JrQkRRb0xNREV5TXpRMU5qYzRPVEFnQVElM0QlM0QoATAAOABAAEgDUhwIABAAGAAgACoOc3RhdGljY2hlY2tzdW1AAFgDYAFoAHIECAEQAHgA"
|
||||||
|
|||||||
40
tests/test_arcparam_mining.py
Normal file
40
tests/test_arcparam_mining.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import pytest
|
||||||
|
from pytchat.tool.mining import parser
|
||||||
|
import pytchat.config as config
|
||||||
|
import requests, json
|
||||||
|
from pytchat.paramgen import arcparam_mining as arcparam
|
||||||
|
|
||||||
|
def test_arcparam_e(mocker):
|
||||||
|
try:
|
||||||
|
arcparam.getparam("01234567890",-1)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_arcparam_0(mocker):
|
||||||
|
param = arcparam.getparam("01234567890",0)
|
||||||
|
|
||||||
|
assert param =="op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
|
||||||
|
|
||||||
|
|
||||||
|
def test_arcparam_1(mocker):
|
||||||
|
param = arcparam.getparam("01234567890", seektime = 100000)
|
||||||
|
print(param)
|
||||||
|
assert param == "op2w0wQzGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABWgUQgMLXL2AEcgIIAXgB"
|
||||||
|
|
||||||
|
def test_arcparam_2(mocker):
|
||||||
|
param = arcparam.getparam("PZz9NB0-Z64",1)
|
||||||
|
url=f"https://www.youtube.com/live_chat_replay?continuation={param}&playerOffsetMs=1000&pbj=1"
|
||||||
|
resp = requests.Session().get(url,headers = config.headers)
|
||||||
|
jsn = json.loads(resp.text)
|
||||||
|
_ , chatdata = parser.parse(jsn[1])
|
||||||
|
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatPaidMessageRenderer"]["id"]
|
||||||
|
print(test_id)
|
||||||
|
assert test_id == "ChwKGkNKSGE0YnFJeWVBQ0ZWcUF3Z0VkdGIwRm9R"
|
||||||
|
|
||||||
|
def test_arcparam_3(mocker):
|
||||||
|
param = arcparam.getparam("01234567890")
|
||||||
|
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
|
||||||
140
tests/test_calculator_get_item.py
Normal file
140
tests/test_calculator_get_item.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
from pytchat.processors.superchat.calculator import SuperchatCalculator
|
||||||
|
|
||||||
|
get_item = SuperchatCalculator()._get_item
|
||||||
|
|
||||||
|
dict_test = {
|
||||||
|
'root':{
|
||||||
|
'node0' : 'value0',
|
||||||
|
'node1' : 'value1',
|
||||||
|
'node2' : {
|
||||||
|
'node2-0' : 'value2-0'
|
||||||
|
},
|
||||||
|
|
||||||
|
'node3' : [
|
||||||
|
{'node3-0' : 'value3-0'},
|
||||||
|
{'node3-1' :
|
||||||
|
{'node3-1-0' : 'value3-1-0'}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'node4' : [],
|
||||||
|
'node5' : [
|
||||||
|
[
|
||||||
|
{'node5-1-0' : 'value5-1-0'},
|
||||||
|
{'node5-1-1' : 'value5-1-1'},
|
||||||
|
],
|
||||||
|
{'node5-0' : 'value5-0'},
|
||||||
|
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
items_test0 = [
|
||||||
|
'root',
|
||||||
|
'node1'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
items_test_not_found0 = [
|
||||||
|
'root',
|
||||||
|
'other_data'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
items_test_nest = [
|
||||||
|
'root',
|
||||||
|
'node2',
|
||||||
|
'node2-0'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list0 = [
|
||||||
|
'root',
|
||||||
|
'node3',
|
||||||
|
1,
|
||||||
|
'node3-1'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list1 = [
|
||||||
|
'root',
|
||||||
|
'node3',
|
||||||
|
1,
|
||||||
|
'node3-1',
|
||||||
|
'node3-1-0'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list2 = [
|
||||||
|
'root',
|
||||||
|
'node4',
|
||||||
|
None
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list3 = [
|
||||||
|
'root',
|
||||||
|
'node4'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list_nest = [
|
||||||
|
'root',
|
||||||
|
'node5',
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
'node5-1-1'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_list_nest_not_found1 = [
|
||||||
|
'root',
|
||||||
|
'node5',
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
'node5-1-1',
|
||||||
|
'nodez'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_not_found1 = [
|
||||||
|
'root',
|
||||||
|
'node3',
|
||||||
|
2,
|
||||||
|
'node3-1',
|
||||||
|
'node3-1-0'
|
||||||
|
]
|
||||||
|
|
||||||
|
items_test_not_found2 = [
|
||||||
|
'root',
|
||||||
|
'node3',
|
||||||
|
2,
|
||||||
|
'node3-1',
|
||||||
|
'node3-1-0',
|
||||||
|
'nodex'
|
||||||
|
]
|
||||||
|
def test_get_items_0():
|
||||||
|
assert get_item(dict_test, items_test0) == 'value1'
|
||||||
|
|
||||||
|
def test_get_items_1():
|
||||||
|
assert get_item(dict_test, items_test_not_found0) is None
|
||||||
|
|
||||||
|
def test_get_items_2():
|
||||||
|
assert get_item(dict_test, items_test_nest) == 'value2-0'
|
||||||
|
|
||||||
|
def test_get_items_3():
|
||||||
|
assert get_item(
|
||||||
|
dict_test, items_test_list0) == {'node3-1-0' : 'value3-1-0'}
|
||||||
|
|
||||||
|
def test_get_items_4():
|
||||||
|
assert get_item(dict_test, items_test_list1) == 'value3-1-0'
|
||||||
|
|
||||||
|
def test_get_items_5():
|
||||||
|
assert get_item(dict_test, items_test_not_found1) == None
|
||||||
|
|
||||||
|
def test_get_items_6():
|
||||||
|
assert get_item(dict_test, items_test_not_found2) == None
|
||||||
|
|
||||||
|
def test_get_items_7():
|
||||||
|
assert get_item(dict_test, items_test_list2) == None
|
||||||
|
|
||||||
|
def test_get_items_8():
|
||||||
|
assert get_item(dict_test, items_test_list_nest) == 'value5-1-1'
|
||||||
|
|
||||||
|
def test_get_items_9():
|
||||||
|
assert get_item(dict_test, items_test_list_nest_not_found1) == None
|
||||||
|
|
||||||
|
def test_get_items_10():
|
||||||
|
assert get_item(dict_test, items_test_list3) == []
|
||||||
68
tests/test_calculator_parse.py
Normal file
68
tests/test_calculator_parse.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import json
|
||||||
|
from pytchat.parser.live import Parser
|
||||||
|
from pytchat.processors.superchat.calculator import SuperchatCalculator
|
||||||
|
from pytchat.exceptions import ChatParseException
|
||||||
|
parse = SuperchatCalculator()._parse
|
||||||
|
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def load_chatdata(filepath):
|
||||||
|
parser = Parser(is_replay=True)
|
||||||
|
#print(json.loads(_open_file(filepath)))
|
||||||
|
contents = parser.get_contents( json.loads(_open_file(filepath)))
|
||||||
|
return parser.parse(contents)[1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_1():
|
||||||
|
renderer ={"purchaseAmountText":{"simpleText":"¥2,000"}}
|
||||||
|
symbol ,amount = parse(renderer)
|
||||||
|
assert symbol == '¥'
|
||||||
|
assert amount == 2000.0
|
||||||
|
|
||||||
|
def test_parse_2():
|
||||||
|
renderer ={"purchaseAmountText":{"simpleText":"ABC\x0a200"}}
|
||||||
|
symbol ,amount = parse(renderer)
|
||||||
|
assert symbol == 'ABC\x0a'
|
||||||
|
assert amount == 200.0
|
||||||
|
|
||||||
|
def test_process_0():
|
||||||
|
"""
|
||||||
|
parse superchat data
|
||||||
|
"""
|
||||||
|
chat_component = {
|
||||||
|
'video_id':'',
|
||||||
|
'timeout':10,
|
||||||
|
'chatdata':load_chatdata(r"tests/testdata/calculator/superchat_0.json")
|
||||||
|
}
|
||||||
|
assert SuperchatCalculator().process([chat_component])=={'¥': 6800.0, '€': 2.0}
|
||||||
|
|
||||||
|
def test_process_1():
|
||||||
|
"""
|
||||||
|
parse no superchat data
|
||||||
|
"""
|
||||||
|
chat_component = {
|
||||||
|
'video_id':'',
|
||||||
|
'timeout':10,
|
||||||
|
'chatdata':load_chatdata(r"tests/testdata/calculator/text_only.json")
|
||||||
|
}
|
||||||
|
assert SuperchatCalculator().process([chat_component])=={}
|
||||||
|
|
||||||
|
def test_process_2():
|
||||||
|
"""
|
||||||
|
try to parse after replay end
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
chat_component = {
|
||||||
|
'video_id':'',
|
||||||
|
'timeout':10,
|
||||||
|
'chatdata':load_chatdata(r"tests/testdata/calculator/replay_end.json")
|
||||||
|
}
|
||||||
|
assert False
|
||||||
|
SuperchatCalculator().process([chat_component])
|
||||||
|
except ChatParseException:
|
||||||
|
assert True
|
||||||
|
|
||||||
@@ -1,10 +1,11 @@
|
|||||||
import json
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
import asyncio,aiohttp
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
from pytchat.parser.live import Parser
|
from pytchat.parser.live import Parser
|
||||||
from pytchat.processors.compatible.processor import CompatibleProcessor
|
from pytchat.processors.compatible.processor import CompatibleProcessor
|
||||||
from pytchat.exceptions import (
|
from pytchat.exceptions import (
|
||||||
NoLivechatRendererException,NoYtinitialdataException,
|
NoLivechatRendererException, NoYtinitialdataException,
|
||||||
ResponseContextError, NoContentsException)
|
ResponseContextError, NoContentsException)
|
||||||
|
|
||||||
from pytchat.processors.compatible.renderer.textmessage import LiveChatTextMessageRenderer
|
from pytchat.processors.compatible.renderer.textmessage import LiveChatTextMessageRenderer
|
||||||
@@ -14,6 +15,7 @@ from pytchat.processors.compatible.renderer.legacypaid import LiveChatLegacyPaid
|
|||||||
|
|
||||||
parser = Parser(is_replay=False)
|
parser = Parser(is_replay=False)
|
||||||
|
|
||||||
|
|
||||||
def test_textmessage(mocker):
|
def test_textmessage(mocker):
|
||||||
'''api互換processorのテスト:通常テキストメッセージ'''
|
'''api互換processorのテスト:通常テキストメッセージ'''
|
||||||
processor = CompatibleProcessor()
|
processor = CompatibleProcessor()
|
||||||
@@ -22,16 +24,16 @@ def test_textmessage(mocker):
|
|||||||
|
|
||||||
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
||||||
data = {
|
data = {
|
||||||
"video_id" : "",
|
"video_id": "",
|
||||||
"timeout" : 7,
|
"timeout": 7,
|
||||||
"chatdata" : chatdata
|
"chatdata": chatdata
|
||||||
}
|
}
|
||||||
ret = processor.process([data])
|
ret = processor.process([data])
|
||||||
|
|
||||||
assert ret["kind"]== "youtube#liveChatMessageListResponse"
|
assert ret["kind"] == "youtube#liveChatMessageListResponse"
|
||||||
assert ret["pollingIntervalMillis"]==data["timeout"]*1000
|
assert ret["pollingIntervalMillis"] == data["timeout"]*1000
|
||||||
assert ret.keys() == {
|
assert ret.keys() == {
|
||||||
"kind", "etag", "pageInfo", "nextPageToken","pollingIntervalMillis","items"
|
"kind", "etag", "pageInfo", "nextPageToken", "pollingIntervalMillis", "items"
|
||||||
}
|
}
|
||||||
assert ret["pageInfo"].keys() == {
|
assert ret["pageInfo"].keys() == {
|
||||||
"totalResults", "resultsPerPage"
|
"totalResults", "resultsPerPage"
|
||||||
@@ -48,8 +50,9 @@ def test_textmessage(mocker):
|
|||||||
assert ret["items"][0]["snippet"]["textMessageDetails"].keys() == {
|
assert ret["items"][0]["snippet"]["textMessageDetails"].keys() == {
|
||||||
'messageText'
|
'messageText'
|
||||||
}
|
}
|
||||||
assert "LCC." in ret["items"][0]["id"]
|
assert "LCC." in ret["items"][0]["id"]
|
||||||
assert ret["items"][0]["snippet"]["type"]=="textMessageEvent"
|
assert ret["items"][0]["snippet"]["type"] == "textMessageEvent"
|
||||||
|
|
||||||
|
|
||||||
def test_newsponcer(mocker):
|
def test_newsponcer(mocker):
|
||||||
'''api互換processorのテスト:メンバ新規登録'''
|
'''api互換processorのテスト:メンバ新規登録'''
|
||||||
@@ -59,22 +62,22 @@ def test_newsponcer(mocker):
|
|||||||
|
|
||||||
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
||||||
data = {
|
data = {
|
||||||
"video_id" : "",
|
"video_id": "",
|
||||||
"timeout" : 7,
|
"timeout": 7,
|
||||||
"chatdata" : chatdata
|
"chatdata": chatdata
|
||||||
}
|
}
|
||||||
ret = processor.process([data])
|
ret = processor.process([data])
|
||||||
|
|
||||||
assert ret["kind"]== "youtube#liveChatMessageListResponse"
|
assert ret["kind"] == "youtube#liveChatMessageListResponse"
|
||||||
assert ret["pollingIntervalMillis"]==data["timeout"]*1000
|
assert ret["pollingIntervalMillis"] == data["timeout"]*1000
|
||||||
assert ret.keys() == {
|
assert ret.keys() == {
|
||||||
"kind", "etag", "pageInfo", "nextPageToken","pollingIntervalMillis","items"
|
"kind", "etag", "pageInfo", "nextPageToken", "pollingIntervalMillis", "items"
|
||||||
}
|
}
|
||||||
assert ret["pageInfo"].keys() == {
|
assert ret["pageInfo"].keys() == {
|
||||||
"totalResults", "resultsPerPage"
|
"totalResults", "resultsPerPage"
|
||||||
}
|
}
|
||||||
assert ret["items"][0].keys() == {
|
assert ret["items"][0].keys() == {
|
||||||
"kind", "etag", "id", "snippet","authorDetails"
|
"kind", "etag", "id", "snippet", "authorDetails"
|
||||||
}
|
}
|
||||||
assert ret["items"][0]["snippet"].keys() == {
|
assert ret["items"][0]["snippet"].keys() == {
|
||||||
'type', 'liveChatId', 'authorChannelId', 'publishedAt', 'hasDisplayContent', 'displayMessage'
|
'type', 'liveChatId', 'authorChannelId', 'publishedAt', 'hasDisplayContent', 'displayMessage'
|
||||||
@@ -83,8 +86,44 @@ def test_newsponcer(mocker):
|
|||||||
assert ret["items"][0]["authorDetails"].keys() == {
|
assert ret["items"][0]["authorDetails"].keys() == {
|
||||||
'channelId', 'channelUrl', 'displayName', 'profileImageUrl', 'isVerified', 'isChatOwner', 'isChatSponsor', 'isChatModerator'
|
'channelId', 'channelUrl', 'displayName', 'profileImageUrl', 'isVerified', 'isChatOwner', 'isChatSponsor', 'isChatModerator'
|
||||||
}
|
}
|
||||||
assert "LCC." in ret["items"][0]["id"]
|
assert "LCC." in ret["items"][0]["id"]
|
||||||
assert ret["items"][0]["snippet"]["type"]=="newSponsorEvent"
|
assert ret["items"][0]["snippet"]["type"] == "newSponsorEvent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_newsponcer_rev(mocker):
|
||||||
|
'''api互換processorのテスト:メンバ新規登録'''
|
||||||
|
processor = CompatibleProcessor()
|
||||||
|
|
||||||
|
_json = _open_file("tests/testdata/compatible/newSponsor_rev.json")
|
||||||
|
|
||||||
|
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
||||||
|
data = {
|
||||||
|
"video_id": "",
|
||||||
|
"timeout": 7,
|
||||||
|
"chatdata": chatdata
|
||||||
|
}
|
||||||
|
ret = processor.process([data])
|
||||||
|
|
||||||
|
assert ret["kind"] == "youtube#liveChatMessageListResponse"
|
||||||
|
assert ret["pollingIntervalMillis"] == data["timeout"]*1000
|
||||||
|
assert ret.keys() == {
|
||||||
|
"kind", "etag", "pageInfo", "nextPageToken", "pollingIntervalMillis", "items"
|
||||||
|
}
|
||||||
|
assert ret["pageInfo"].keys() == {
|
||||||
|
"totalResults", "resultsPerPage"
|
||||||
|
}
|
||||||
|
assert ret["items"][0].keys() == {
|
||||||
|
"kind", "etag", "id", "snippet", "authorDetails"
|
||||||
|
}
|
||||||
|
assert ret["items"][0]["snippet"].keys() == {
|
||||||
|
'type', 'liveChatId', 'authorChannelId', 'publishedAt', 'hasDisplayContent', 'displayMessage'
|
||||||
|
|
||||||
|
}
|
||||||
|
assert ret["items"][0]["authorDetails"].keys() == {
|
||||||
|
'channelId', 'channelUrl', 'displayName', 'profileImageUrl', 'isVerified', 'isChatOwner', 'isChatSponsor', 'isChatModerator'
|
||||||
|
}
|
||||||
|
assert "LCC." in ret["items"][0]["id"]
|
||||||
|
assert ret["items"][0]["snippet"]["type"] == "newSponsorEvent"
|
||||||
|
|
||||||
|
|
||||||
def test_superchat(mocker):
|
def test_superchat(mocker):
|
||||||
@@ -95,16 +134,16 @@ def test_superchat(mocker):
|
|||||||
|
|
||||||
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
||||||
data = {
|
data = {
|
||||||
"video_id" : "",
|
"video_id": "",
|
||||||
"timeout" : 7,
|
"timeout": 7,
|
||||||
"chatdata" : chatdata
|
"chatdata": chatdata
|
||||||
}
|
}
|
||||||
ret = processor.process([data])
|
ret = processor.process([data])
|
||||||
|
|
||||||
assert ret["kind"]== "youtube#liveChatMessageListResponse"
|
assert ret["kind"] == "youtube#liveChatMessageListResponse"
|
||||||
assert ret["pollingIntervalMillis"]==data["timeout"]*1000
|
assert ret["pollingIntervalMillis"] == data["timeout"]*1000
|
||||||
assert ret.keys() == {
|
assert ret.keys() == {
|
||||||
"kind", "etag", "pageInfo", "nextPageToken","pollingIntervalMillis","items"
|
"kind", "etag", "pageInfo", "nextPageToken", "pollingIntervalMillis", "items"
|
||||||
}
|
}
|
||||||
assert ret["pageInfo"].keys() == {
|
assert ret["pageInfo"].keys() == {
|
||||||
"totalResults", "resultsPerPage"
|
"totalResults", "resultsPerPage"
|
||||||
@@ -121,8 +160,9 @@ def test_superchat(mocker):
|
|||||||
assert ret["items"][0]["snippet"]["superChatDetails"].keys() == {
|
assert ret["items"][0]["snippet"]["superChatDetails"].keys() == {
|
||||||
'amountMicros', 'currency', 'amountDisplayString', 'tier', 'backgroundColor'
|
'amountMicros', 'currency', 'amountDisplayString', 'tier', 'backgroundColor'
|
||||||
}
|
}
|
||||||
assert "LCC." in ret["items"][0]["id"]
|
assert "LCC." in ret["items"][0]["id"]
|
||||||
assert ret["items"][0]["snippet"]["type"]=="superChatEvent"
|
assert ret["items"][0]["snippet"]["type"] == "superChatEvent"
|
||||||
|
|
||||||
|
|
||||||
def test_unregistered_currency(mocker):
|
def test_unregistered_currency(mocker):
|
||||||
processor = CompatibleProcessor()
|
processor = CompatibleProcessor()
|
||||||
@@ -132,14 +172,14 @@ def test_unregistered_currency(mocker):
|
|||||||
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
_, chatdata = parser.parse(parser.get_contents(json.loads(_json)))
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"video_id" : "",
|
"video_id": "",
|
||||||
"timeout" : 7,
|
"timeout": 7,
|
||||||
"chatdata" : chatdata
|
"chatdata": chatdata
|
||||||
}
|
}
|
||||||
ret = processor.process([data])
|
ret = processor.process([data])
|
||||||
assert ret["items"][0]["snippet"]["superChatDetails"]["currency"] == "[UNREGISTERD]"
|
assert ret["items"][0]["snippet"]["superChatDetails"]["currency"] == "[UNREGISTERD]"
|
||||||
|
|
||||||
|
|
||||||
def _open_file(path):
|
def _open_file(path):
|
||||||
with open(path,mode ='r',encoding = 'utf-8') as f:
|
with open(path, mode='r', encoding='utf-8') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|||||||
77
tests/test_extract_asyncdl.py
Normal file
77
tests/test_extract_asyncdl.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from pytchat.tool.extract import parser
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from aioresponses import aioresponses
|
||||||
|
from concurrent.futures import CancelledError
|
||||||
|
from pytchat.tool.extract import asyncdl
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def test_asyncdl_split():
|
||||||
|
|
||||||
|
ret = asyncdl._split(0,1000,1)
|
||||||
|
assert ret == [0]
|
||||||
|
|
||||||
|
ret = asyncdl._split(1000,1000,10)
|
||||||
|
assert ret == [1000]
|
||||||
|
|
||||||
|
ret = asyncdl._split(0,1000,5)
|
||||||
|
assert ret == [0,200,400,600,800]
|
||||||
|
|
||||||
|
ret = asyncdl._split(10.5, 700.3, 5)
|
||||||
|
assert ret == [10, 148, 286, 424, 562]
|
||||||
|
|
||||||
|
|
||||||
|
ret = asyncdl._split(0,500,5)
|
||||||
|
assert ret == [0,125,250,375]
|
||||||
|
|
||||||
|
ret = asyncdl._split(0,500,500)
|
||||||
|
assert ret == [0,125,250,375]
|
||||||
|
|
||||||
|
ret = asyncdl._split(-1,1000,5)
|
||||||
|
assert ret == [-1, 199, 399, 599, 799]
|
||||||
|
|
||||||
|
"""invalid argument order"""
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split(500,0,5)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
"""invalid count"""
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split(0,500,-1)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split(0,500,0)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
"""invalid argument type"""
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split(0,5000,5.2)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split(0,5000,"test")
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
try:
|
||||||
|
ret = asyncdl._split([0,1],5000,5)
|
||||||
|
assert False
|
||||||
|
except ValueError:
|
||||||
|
assert True
|
||||||
128
tests/test_extract_duplcheck.py
Normal file
128
tests/test_extract_duplcheck.py
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os, sys
|
||||||
|
import time
|
||||||
|
from pytchat.tool.extract import duplcheck
|
||||||
|
from pytchat.tool.extract import parser
|
||||||
|
from pytchat.tool.extract.block import Block
|
||||||
|
from pytchat.tool.extract.duplcheck import _dump
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_overlap():
|
||||||
|
"""
|
||||||
|
test overlap data
|
||||||
|
operation : [0] [2] [3] [4] -> last :align to end
|
||||||
|
[1] , [5] -> no change
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/extract_duplcheck/overlap/"+filename))
|
||||||
|
)[1]
|
||||||
|
|
||||||
|
blocks = (
|
||||||
|
Block(first = 0, last= 12771, end= 9890,chat_data = load_chatdata("dp0-0.json")),
|
||||||
|
Block(first = 9890, last= 15800, end= 20244,chat_data = load_chatdata("dp0-1.json")),
|
||||||
|
Block(first = 20244,last= 45146, end= 32476,chat_data = load_chatdata("dp0-2.json")),
|
||||||
|
Block(first = 32476,last= 50520, end= 41380,chat_data = load_chatdata("dp0-3.json")),
|
||||||
|
Block(first = 41380,last= 62875, end= 52568,chat_data = load_chatdata("dp0-4.json")),
|
||||||
|
Block(first = 52568,last= 62875, end= 54000,chat_data = load_chatdata("dp0-5.json"),is_last=True)
|
||||||
|
)
|
||||||
|
result = duplcheck.remove_overlap(blocks)
|
||||||
|
#dp0-0.json has item offset time is 9890 (equals block[0].end = block[1].first),
|
||||||
|
#but must be aligne to the most close and smaller value:9779.
|
||||||
|
assert result[0].last == 9779
|
||||||
|
|
||||||
|
assert result[1].last == 15800
|
||||||
|
|
||||||
|
assert result[2].last == 32196
|
||||||
|
|
||||||
|
assert result[3].last == 41116
|
||||||
|
|
||||||
|
assert result[4].last == 52384
|
||||||
|
|
||||||
|
#the last block must be always added to result.
|
||||||
|
assert result[5].last == 62875
|
||||||
|
|
||||||
|
def test_duplicate_head():
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
|
||||||
|
)[1]
|
||||||
|
|
||||||
|
"""
|
||||||
|
test duplicate head data
|
||||||
|
operation : [0] , [1] -> discard [0]
|
||||||
|
[1] , [2] -> discard [1]
|
||||||
|
[2] , [3] -> append [2]
|
||||||
|
[3] , [4] -> discard [3]
|
||||||
|
[4] , [5] -> append [4]
|
||||||
|
append [5]
|
||||||
|
|
||||||
|
result : [2] , [4] , [5]
|
||||||
|
"""
|
||||||
|
|
||||||
|
#chat data offsets are ignored.
|
||||||
|
blocks = (
|
||||||
|
Block(first = 0, last = 2500, chat_data = load_chatdata("dp0-0.json")),
|
||||||
|
Block(first = 0, last =38771, chat_data = load_chatdata("dp0-1.json")),
|
||||||
|
Block(first = 0, last =45146, chat_data = load_chatdata("dp0-2.json")),
|
||||||
|
Block(first = 20244, last =60520, chat_data = load_chatdata("dp0-3.json")),
|
||||||
|
Block(first = 20244, last =62875, chat_data = load_chatdata("dp0-4.json")),
|
||||||
|
Block(first = 52568, last =62875, chat_data = load_chatdata("dp0-5.json"))
|
||||||
|
)
|
||||||
|
_dump(blocks)
|
||||||
|
result = duplcheck.remove_duplicate_head(blocks)
|
||||||
|
|
||||||
|
assert len(result) == 3
|
||||||
|
assert result[0].first == blocks[2].first
|
||||||
|
assert result[0].last == blocks[2].last
|
||||||
|
assert result[1].first == blocks[4].first
|
||||||
|
assert result[1].last == blocks[4].last
|
||||||
|
assert result[2].first == blocks[5].first
|
||||||
|
assert result[2].last == blocks[5].last
|
||||||
|
|
||||||
|
def test_duplicate_tail():
|
||||||
|
"""
|
||||||
|
test duplicate tail data
|
||||||
|
operation : append [0]
|
||||||
|
[0] , [1] -> discard [1]
|
||||||
|
[1] , [2] -> append [2]
|
||||||
|
[2] , [3] -> discard [3]
|
||||||
|
[3] , [4] -> append [4]
|
||||||
|
[4] , [5] -> discard [5]
|
||||||
|
|
||||||
|
result : [0] , [2] , [4]
|
||||||
|
"""
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/extract_duplcheck/head/"+filename))
|
||||||
|
)[1]
|
||||||
|
#chat data offsets are ignored.
|
||||||
|
blocks = (
|
||||||
|
Block(first = 0,last = 2500, chat_data=load_chatdata("dp0-0.json")),
|
||||||
|
Block(first = 1500,last = 2500, chat_data=load_chatdata("dp0-1.json")),
|
||||||
|
Block(first = 10000,last = 45146, chat_data=load_chatdata("dp0-2.json")),
|
||||||
|
Block(first = 20244,last = 45146, chat_data=load_chatdata("dp0-3.json")),
|
||||||
|
Block(first = 20244,last = 62875, chat_data=load_chatdata("dp0-4.json")),
|
||||||
|
Block(first = 52568,last = 62875, chat_data=load_chatdata("dp0-5.json"))
|
||||||
|
)
|
||||||
|
|
||||||
|
result = duplcheck.remove_duplicate_tail(blocks)
|
||||||
|
_dump(result)
|
||||||
|
assert len(result) == 3
|
||||||
|
assert result[0].first == blocks[0].first
|
||||||
|
assert result[0].last == blocks[0].last
|
||||||
|
assert result[1].first == blocks[2].first
|
||||||
|
assert result[1].last == blocks[2].last
|
||||||
|
assert result[2].first == blocks[4].first
|
||||||
|
assert result[2].last == blocks[4].last
|
||||||
|
|
||||||
|
|
||||||
238
tests/test_extract_patch.py
Normal file
238
tests/test_extract_patch.py
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os, sys
|
||||||
|
import time
|
||||||
|
from aioresponses import aioresponses
|
||||||
|
from pytchat.tool.extract import duplcheck
|
||||||
|
from pytchat.tool.extract import parser
|
||||||
|
from pytchat.tool.extract.block import Block
|
||||||
|
from pytchat.tool.extract.patch import Patch, fill, split, set_patch
|
||||||
|
from pytchat.tool.extract.duplcheck import _dump
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def load_chatdata(filename):
|
||||||
|
return parser.parse(
|
||||||
|
json.loads(_open_file("tests/testdata/fetch_patch/"+filename))
|
||||||
|
)[1]
|
||||||
|
|
||||||
|
|
||||||
|
def test_split_0():
|
||||||
|
"""
|
||||||
|
Normal case
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
@parent_block (# = already fetched)
|
||||||
|
|
||||||
|
first last end
|
||||||
|
|########----------------------------------------|
|
||||||
|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
first = last = 0 end (=parent_end)
|
||||||
|
| |
|
||||||
|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
|
||||||
|
first last end (after split)
|
||||||
|
|########------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
first last end
|
||||||
|
|###########---------------|
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=4000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert child.continuation == 'patch'
|
||||||
|
assert parent.last < child.first
|
||||||
|
assert parent.end == child.first
|
||||||
|
assert child.first < child.last
|
||||||
|
assert child.last < child.end
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == False
|
||||||
|
|
||||||
|
def test_split_1():
|
||||||
|
"""patch.first <= parent_block.last
|
||||||
|
|
||||||
|
While awaiting at run()->asyncdl._fetch()
|
||||||
|
fetching parent_block proceeds,
|
||||||
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
|
In this case, fetched patch is all discarded,
|
||||||
|
and worker searches other processing block again.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
patch.first
|
||||||
|
first | last end
|
||||||
|
|####################|#####|---------------------|
|
||||||
|
^
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end (=parent_end)
|
||||||
|
| |
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end
|
||||||
|
|###########################|--------------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
.............. -> discard all data
|
||||||
|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=33000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert parent.last == 33000 #no change
|
||||||
|
assert parent.end == 60000 #no change
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == True #exclude during_split sequence
|
||||||
|
|
||||||
|
def test_split_2():
|
||||||
|
"""child_block.end < patch.last:
|
||||||
|
|
||||||
|
Case the last offset of patch exceeds child_block.end.
|
||||||
|
In this case, remove overlapped data of patch.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
@parent_block (# = already fetched)
|
||||||
|
first last end (before split)
|
||||||
|
|########------------------------------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end (=parent_end)
|
||||||
|
| |
|
||||||
|
|
||||||
|
continuation:succeed from patch
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-------- patch --------|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end (after split)
|
||||||
|
|########------------|
|
||||||
|
|
||||||
|
@child_block old patch.end
|
||||||
|
first last=end |
|
||||||
|
|#################|...... cut extra data.
|
||||||
|
^
|
||||||
|
continuation : None (extract complete)
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-------- patch --------|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=4000, end=33500, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=33500, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.last < child.first
|
||||||
|
assert parent.end == child.first
|
||||||
|
assert child.first < child.last
|
||||||
|
assert child.last < child.end
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == False
|
||||||
|
|
||||||
|
def test_split_none():
|
||||||
|
"""patch.last <= parent_block.last
|
||||||
|
|
||||||
|
While awaiting at run()->asyncdl._fetch()
|
||||||
|
fetching parent_block proceeds,
|
||||||
|
and parent.block.last exceeds patch.first.
|
||||||
|
|
||||||
|
In this case, fetched patch is all discarded,
|
||||||
|
and worker searches other processing block again.
|
||||||
|
|
||||||
|
~~~~~~ before ~~~~~~
|
||||||
|
|
||||||
|
patch.first
|
||||||
|
first | last end
|
||||||
|
|####################|###################|-------|
|
||||||
|
^
|
||||||
|
@child_block
|
||||||
|
first = last = 0 end (=parent_end)
|
||||||
|
| |
|
||||||
|
|
||||||
|
@fetched patch
|
||||||
|
|-- patch --|
|
||||||
|
patch.last < parent_block.last .
|
||||||
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
V
|
||||||
|
|
||||||
|
~~~~~~ after ~~~~~~
|
||||||
|
|
||||||
|
@parent_block
|
||||||
|
first last end (before split)
|
||||||
|
|########################################|-------|
|
||||||
|
|
||||||
|
@child_block
|
||||||
|
|
||||||
|
............ -> discard all data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
parent = Block(first=0, last=40000, end=60000, continuation='parent', during_split=True)
|
||||||
|
child = Block(first=0, last=0, end=60000, continuation='mean', during_split=True)
|
||||||
|
patch = Patch(chats=load_chatdata('pt0-5.json'),
|
||||||
|
first=32500, last=34000, continuation='patch')
|
||||||
|
|
||||||
|
split(parent,child,patch)
|
||||||
|
|
||||||
|
assert parent.last == 40000 #no change
|
||||||
|
assert parent.end == 60000 #no change
|
||||||
|
assert child.continuation is None
|
||||||
|
assert parent.during_split == False
|
||||||
|
assert child.during_split == True #exclude during_split sequence
|
||||||
48
tests/test_jsonfile_archiver.py
Normal file
48
tests/test_jsonfile_archiver.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import json
|
||||||
|
from pytchat.processors.jsonfile_archiver import JsonfileArchiver
|
||||||
|
from unittest.mock import patch, mock_open
|
||||||
|
from tests.testdata.jsonfile_archiver.chat_component import chat_component
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def test_checkpath(mocker):
|
||||||
|
processor = JsonfileArchiver("path")
|
||||||
|
mocker.patch('os.path.exists').side_effect = exists_file
|
||||||
|
'''Test no duplicate file.'''
|
||||||
|
assert processor._checkpath("z:/other.txt") == "z:/other.txt"
|
||||||
|
|
||||||
|
'''Test duplicate filename.
|
||||||
|
The case the name first renamed ('test.txt -> test(0).txt')
|
||||||
|
is also duplicated.
|
||||||
|
'''
|
||||||
|
assert processor._checkpath("z:/test.txt") == "z:/test(1).txt"
|
||||||
|
|
||||||
|
'''Test no extention file (duplicate).'''
|
||||||
|
assert processor._checkpath("z:/test") == "z:/test(0)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_write():
|
||||||
|
'''Test read and write chatdata'''
|
||||||
|
mock = mock_open(read_data = "")
|
||||||
|
with patch('builtins.open',mock):
|
||||||
|
processor = JsonfileArchiver("path")
|
||||||
|
save_path = processor.process([chat_component])
|
||||||
|
fh = mock()
|
||||||
|
actuals = [args[0] for (args, kwargs) in fh.writelines.call_args_list]
|
||||||
|
'''write format is json dump string with 0x0A'''
|
||||||
|
to_be_written = [json.dumps(action, ensure_ascii=False)+'\n'
|
||||||
|
for action in chat_component["chatdata"]]
|
||||||
|
for i in range(len(actuals)):
|
||||||
|
assert actuals[i] == to_be_written[i]
|
||||||
|
assert save_path == {'save_path': 'path', 'total_lines': 7}
|
||||||
|
|
||||||
|
|
||||||
|
def exists_file(path):
|
||||||
|
if path == "z:/test.txt":
|
||||||
|
return True
|
||||||
|
if path == "z:/test(0).txt":
|
||||||
|
return True
|
||||||
|
if path == "z:/test":
|
||||||
|
return True
|
||||||
@@ -4,6 +4,6 @@ from pytchat.paramgen import liveparam
|
|||||||
def test_liveparam_0(mocker):
|
def test_liveparam_0(mocker):
|
||||||
_ts1= 1546268400
|
_ts1= 1546268400
|
||||||
param = liveparam._build("01234567890",
|
param = liveparam._build("01234567890",
|
||||||
*([_ts1*1000000 for i in range(5)]))
|
*([_ts1*1000000 for i in range(5)]), topchat_only=False)
|
||||||
test_param="0ofMyAPiARp8Q2c4S0RRb0xNREV5TXpRMU5qYzRPVEFhUTZxNXdiMEJQUW83YUhSMGNITTZMeTkzZDNjdWVXOTFkSFZpWlM1amIyMHZiR2wyWlY5amFHRjBQM1k5TURFeU16UTFOamM0T1RBbWFYTmZjRzl3YjNWMFBURWdBZyUzRCUzRCiAuNbVqsrfAjAAOABAAkorCAEQABgAIAAqDnN0YXRpY2NoZWNrc3VtOgBAAEoCCAFQgLjW1arK3wJYA1CAuNbVqsrfAliAuNbVqsrfAmgBggEECAEQAIgBAKABgLjW1arK3wI%3D"
|
test_param="0ofMyAPiARp8Q2c4S0RRb0xNREV5TXpRMU5qYzRPVEFhUTZxNXdiMEJQUW83YUhSMGNITTZMeTkzZDNjdWVXOTFkSFZpWlM1amIyMHZiR2wyWlY5amFHRjBQM1k5TURFeU16UTFOamM0T1RBbWFYTmZjRzl3YjNWMFBURWdBZyUzRCUzRCiAuNbVqsrfAjAAOABAAkorCAEQABgAIAAqDnN0YXRpY2NoZWNrc3VtOgBAAEoCCAFQgLjW1arK3wJYA1CAuNbVqsrfAliAuNbVqsrfAmgBggEECAEQAIgBAKABgLjW1arK3wI%3D"
|
||||||
assert test_param == param
|
assert test_param == param
|
||||||
@@ -7,7 +7,7 @@ from pytchat.exceptions import (
|
|||||||
NoLivechatRendererException,NoYtinitialdataException,
|
NoLivechatRendererException,NoYtinitialdataException,
|
||||||
ResponseContextError, NoContentsException)
|
ResponseContextError, NoContentsException)
|
||||||
|
|
||||||
from pytchat.processors.speed_calculator import SpeedCalculator
|
from pytchat.processors.speed.calculator import SpeedCalculator
|
||||||
|
|
||||||
parser = Parser(is_replay =False)
|
parser = Parser(is_replay =False)
|
||||||
|
|
||||||
|
|||||||
62
tests/test_videoinfo.py
Normal file
62
tests/test_videoinfo.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
from pytchat.tool.videoinfo import VideoInfo
|
||||||
|
from pytchat.exceptions import InvalidVideoIdException
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
def _open_file(path):
|
||||||
|
with open(path,mode ='r',encoding = 'utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def _set_test_data(filepath, mocker):
|
||||||
|
_text = _open_file(filepath)
|
||||||
|
response_mock = mocker.Mock()
|
||||||
|
response_mock.status_code = 200
|
||||||
|
response_mock.text = _text
|
||||||
|
mocker.patch('requests.get').return_value = response_mock
|
||||||
|
|
||||||
|
def test_archived_page(mocker):
|
||||||
|
_set_test_data('tests/testdata/videoinfo/archived_page.txt', mocker)
|
||||||
|
info = VideoInfo('test_id')
|
||||||
|
actual_thumbnail_url = 'https://i.ytimg.com/vi/fzI9FNjXQ0o/hqdefault.jpg'
|
||||||
|
assert info.video_id == 'test_id'
|
||||||
|
assert info.get_channel_name() == 'GitHub'
|
||||||
|
assert info.get_thumbnail() == actual_thumbnail_url
|
||||||
|
assert info.get_title() == 'GitHub Arctic Code Vault'
|
||||||
|
assert info.get_channel_id() == 'UC7c3Kb6jYCRj4JOHHZTxKsQ'
|
||||||
|
assert info.get_duration() == 148
|
||||||
|
|
||||||
|
def test_live_page(mocker):
|
||||||
|
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
|
||||||
|
info = VideoInfo('test_id')
|
||||||
|
'''live page :duration = 0'''
|
||||||
|
assert info.get_duration() == 0
|
||||||
|
assert info.video_id == 'test_id'
|
||||||
|
assert info.get_channel_name() == 'BGM channel'
|
||||||
|
assert info.get_thumbnail() == \
|
||||||
|
'https://i.ytimg.com/vi/fEvM-OUbaKs/hqdefault_live.jpg'
|
||||||
|
assert info.get_title() == (
|
||||||
|
'Coffee Jazz Music - Chill Out Lounge Jazz Music Radio'
|
||||||
|
' - 24/7 Live Stream - Slow Jazz')
|
||||||
|
assert info.get_channel_id() == 'UCQINXHZqCU5i06HzxRkujfg'
|
||||||
|
|
||||||
|
def test_invalid_video_id(mocker):
|
||||||
|
'''Test case invalid video_id is specified.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/invalid_video_id_page.txt', mocker)
|
||||||
|
try:
|
||||||
|
_ = VideoInfo('test_id')
|
||||||
|
assert False
|
||||||
|
except InvalidVideoIdException:
|
||||||
|
assert True
|
||||||
|
|
||||||
|
def test_no_info(mocker):
|
||||||
|
'''Test case the video page has renderer, but no info.'''
|
||||||
|
_set_test_data(
|
||||||
|
'tests/testdata/videoinfo/no_info_page.txt', mocker)
|
||||||
|
info = VideoInfo('test_id')
|
||||||
|
assert info.video_id == 'test_id'
|
||||||
|
assert info.get_channel_name() is None
|
||||||
|
assert info.get_thumbnail() is None
|
||||||
|
assert info.get_title() is None
|
||||||
|
assert info.get_channel_id() is None
|
||||||
|
assert info.get_duration() is None
|
||||||
|
|
||||||
18
tests/testdata/calculator/replay_end.json
vendored
Normal file
18
tests/testdata/calculator/replay_end.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"response": {
|
||||||
|
"responseContext": {
|
||||||
|
"webResponseContextExtensionData": ""
|
||||||
|
},
|
||||||
|
"continuationContents": {
|
||||||
|
"liveChatContinuation": {
|
||||||
|
"continuations": [
|
||||||
|
{
|
||||||
|
"playerSeekContinuationData": {
|
||||||
|
"continuation": "___reload_continuation___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
3324
tests/testdata/calculator/superchat_0.json
vendored
Normal file
3324
tests/testdata/calculator/superchat_0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
89
tests/testdata/calculator/text_only.json
vendored
Normal file
89
tests/testdata/calculator/text_only.json
vendored
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{
|
||||||
|
"response": {
|
||||||
|
"responseContext": {
|
||||||
|
"webResponseContextExtensionData": ""
|
||||||
|
},
|
||||||
|
"continuationContents": {
|
||||||
|
"liveChatContinuation": {
|
||||||
|
"continuations": [
|
||||||
|
{
|
||||||
|
"invalidationContinuationData": {
|
||||||
|
"invalidationId": {
|
||||||
|
"objectSource": 1000,
|
||||||
|
"objectId": "___objectId___",
|
||||||
|
"topic": "chat~00000000000~0000000",
|
||||||
|
"subscribeToGcmTopics": true,
|
||||||
|
"protoCreationTimestampMs": "1577804400000"
|
||||||
|
},
|
||||||
|
"timeoutMs": 10000,
|
||||||
|
"continuation": "___continuation___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"replayChatItemAction": {
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTextMessageRenderer": {
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "dummy_message"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"timestampText": {
|
||||||
|
"simpleText": "0:00"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clientId": "dummy_client_id"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"videoOffsetTimeMsec": "10000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1823
tests/testdata/compatible/newSponsor_rev.json
vendored
Normal file
1823
tests/testdata/compatible/newSponsor_rev.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6128
tests/testdata/extract_duplcheck/head/dp0-0.json
vendored
Normal file
6128
tests/testdata/extract_duplcheck/head/dp0-0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/head/dp0-1.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/head/dp0-1.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/head/dp0-2.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/head/dp0-2.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/head/dp0-3.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/head/dp0-3.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2529
tests/testdata/extract_duplcheck/head/dp0-4.json
vendored
Normal file
2529
tests/testdata/extract_duplcheck/head/dp0-4.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1431
tests/testdata/extract_duplcheck/head/dp0-5.json
vendored
Normal file
1431
tests/testdata/extract_duplcheck/head/dp0-5.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6128
tests/testdata/extract_duplcheck/overlap/dp0-0.json
vendored
Normal file
6128
tests/testdata/extract_duplcheck/overlap/dp0-0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/overlap/dp0-1.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/overlap/dp0-1.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/overlap/dp0-2.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/overlap/dp0-2.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/extract_duplcheck/overlap/dp0-3.json
vendored
Normal file
3078
tests/testdata/extract_duplcheck/overlap/dp0-3.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2529
tests/testdata/extract_duplcheck/overlap/dp0-4.json
vendored
Normal file
2529
tests/testdata/extract_duplcheck/overlap/dp0-4.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1431
tests/testdata/extract_duplcheck/overlap/dp0-5.json
vendored
Normal file
1431
tests/testdata/extract_duplcheck/overlap/dp0-5.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/fetch_patch/pt0-0.json
vendored
Normal file
3078
tests/testdata/fetch_patch/pt0-0.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/fetch_patch/pt0-1.json
vendored
Normal file
3078
tests/testdata/fetch_patch/pt0-1.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/fetch_patch/pt0-3.json
vendored
Normal file
3078
tests/testdata/fetch_patch/pt0-3.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/fetch_patch/pt0-4.json
vendored
Normal file
3078
tests/testdata/fetch_patch/pt0-4.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3078
tests/testdata/fetch_patch/pt0-5.json
vendored
Normal file
3078
tests/testdata/fetch_patch/pt0-5.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
487
tests/testdata/jsonfile_archiver/chat_component.py
vendored
Normal file
487
tests/testdata/jsonfile_archiver/chat_component.py
vendored
Normal file
@@ -0,0 +1,487 @@
|
|||||||
|
chat_component = {
|
||||||
|
"video_id" : "video_id",
|
||||||
|
"timeout" : 10,
|
||||||
|
"chatdata": [
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTextMessageRenderer": {
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "This is normal message."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clientId": "dummy_client_id"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTextMessageRenderer": {
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "This is members's message"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorBadges": [
|
||||||
|
{
|
||||||
|
"liveChatAuthorBadgeRenderer": {
|
||||||
|
"customThumbnail": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/X=s32-c-k"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/X=s32-c-k"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tooltip": "メンバー(2 か月)",
|
||||||
|
"accessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "メンバー(2 か月)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clientId": "dummy_client_id"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatPlaceholderItemRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clientId": "dummy_client_id"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addLiveChatTickerItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTickerPaidMessageItemRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"amount": {
|
||||||
|
"simpleText": "¥10,000"
|
||||||
|
},
|
||||||
|
"amountTextColor": 4294967295,
|
||||||
|
"startBackgroundColor": 4293271831,
|
||||||
|
"endBackgroundColor": 4291821568,
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"durationSec": 3600,
|
||||||
|
"showItemEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"showLiveChatItemEndpoint": {
|
||||||
|
"renderer": {
|
||||||
|
"liveChatPaidMessageRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"purchaseAmountText": {
|
||||||
|
"simpleText": "¥10,000"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "This is superchat message."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"headerBackgroundColor": 4291821568,
|
||||||
|
"headerTextColor": 4294967295,
|
||||||
|
"bodyBackgroundColor": 4293271831,
|
||||||
|
"bodyTextColor": 4294967295,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"authorNameTextColor": 3019898879,
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"timestampColor": 2164260863,
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"fullDurationSec": 3600
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"durationSec": "3600"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatPaidMessageRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"purchaseAmountText": {
|
||||||
|
"simpleText": "¥10,800"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "This is superchat message."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"headerBackgroundColor": 4291821568,
|
||||||
|
"headerTextColor": 4294967295,
|
||||||
|
"bodyBackgroundColor": 4293271831,
|
||||||
|
"bodyTextColor": 4294967295,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"authorNameTextColor": 3019898879,
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"timestampColor": 2164260863,
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatPaidStickerRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"clickTrackingParams": "___clickTrackingParams___",
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"sticker": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "//lh3.googleusercontent.com/param_s=s40-rp",
|
||||||
|
"width": 40,
|
||||||
|
"height": 40
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "//lh3.googleusercontent.com/param_s=s80-rp",
|
||||||
|
"width": 80,
|
||||||
|
"height": 80
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"accessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "___sticker_label___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"moneyChipBackgroundColor": 4280191205,
|
||||||
|
"moneyChipTextColor": 4294967295,
|
||||||
|
"purchaseAmountText": {
|
||||||
|
"simpleText": "¥150"
|
||||||
|
},
|
||||||
|
"stickerDisplayWidth": 40,
|
||||||
|
"stickerDisplayHeight": 40,
|
||||||
|
"backgroundColor": 4279592384,
|
||||||
|
"authorNameTextColor": 3019898879,
|
||||||
|
"trackingParams": "___trackingParams___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"addLiveChatTickerItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTickerSponsorItemRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"detailText": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "メンバー"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"detailTextColor": 4294967295,
|
||||||
|
"startBackgroundColor": 4279213400,
|
||||||
|
"endBackgroundColor": 4278943811,
|
||||||
|
"sponsorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"durationSec": 300,
|
||||||
|
"showItemEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"showLiveChatItemEndpoint": {
|
||||||
|
"renderer": {
|
||||||
|
"liveChatMembershipItemRenderer": {
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"headerSubtext": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "メンバーシップ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": " へようこそ!"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorBadges": [
|
||||||
|
{
|
||||||
|
"liveChatAuthorBadgeRenderer": {
|
||||||
|
"customThumbnail": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/X=s32-c-k"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/X=s32-c-k"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tooltip": "新規メンバー",
|
||||||
|
"accessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "新規メンバー"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"fullDurationSec": 300
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"durationSec": "300"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
18
tests/testdata/jsonfile_archiver/replay_end.json
vendored
Normal file
18
tests/testdata/jsonfile_archiver/replay_end.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"response": {
|
||||||
|
"responseContext": {
|
||||||
|
"webResponseContextExtensionData": ""
|
||||||
|
},
|
||||||
|
"continuationContents": {
|
||||||
|
"liveChatContinuation": {
|
||||||
|
"continuations": [
|
||||||
|
{
|
||||||
|
"playerSeekContinuationData": {
|
||||||
|
"continuation": "___reload_continuation___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
89
tests/testdata/jsonfile_archiver/text_only.json
vendored
Normal file
89
tests/testdata/jsonfile_archiver/text_only.json
vendored
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{
|
||||||
|
"response": {
|
||||||
|
"responseContext": {
|
||||||
|
"webResponseContextExtensionData": ""
|
||||||
|
},
|
||||||
|
"continuationContents": {
|
||||||
|
"liveChatContinuation": {
|
||||||
|
"continuations": [
|
||||||
|
{
|
||||||
|
"invalidationContinuationData": {
|
||||||
|
"invalidationId": {
|
||||||
|
"objectSource": 1000,
|
||||||
|
"objectId": "___objectId___",
|
||||||
|
"topic": "chat~00000000000~0000000",
|
||||||
|
"subscribeToGcmTopics": true,
|
||||||
|
"protoCreationTimestampMs": "1577804400000"
|
||||||
|
},
|
||||||
|
"timeoutMs": 10000,
|
||||||
|
"continuation": "___continuation___"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"replayChatItemAction": {
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"addChatItemAction": {
|
||||||
|
"item": {
|
||||||
|
"liveChatTextMessageRenderer": {
|
||||||
|
"message": {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"text": "dummy_message"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"authorName": {
|
||||||
|
"simpleText": "author_name"
|
||||||
|
},
|
||||||
|
"authorPhoto": {
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 32,
|
||||||
|
"height": 32
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s32-x-x-xx-xx-xx-c0xffffff/photo.jpg",
|
||||||
|
"width": 64,
|
||||||
|
"height": 64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"contextMenuEndpoint": {
|
||||||
|
"commandMetadata": {
|
||||||
|
"webCommandMetadata": {
|
||||||
|
"ignoreNavigation": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"liveChatItemContextMenuEndpoint": {
|
||||||
|
"params": "___params___"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "dummy_id",
|
||||||
|
"timestampUsec": 0,
|
||||||
|
"authorExternalChannelId": "http://www.youtube.com/channel/author_channel_url",
|
||||||
|
"contextMenuAccessibility": {
|
||||||
|
"accessibilityData": {
|
||||||
|
"label": "コメントの操作"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"timestampText": {
|
||||||
|
"simpleText": "0:00"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clientId": "dummy_client_id"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"videoOffsetTimeMsec": "10000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
tests/testdata/videoinfo/archived_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/archived_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
14
tests/testdata/videoinfo/invalid_video_id_page.txt
vendored
Normal file
14
tests/testdata/videoinfo/invalid_video_id_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
15
tests/testdata/videoinfo/live_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/live_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
15
tests/testdata/videoinfo/no_info_page.txt
vendored
Normal file
15
tests/testdata/videoinfo/no_info_page.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user