Compare commits

..

68 Commits

Author SHA1 Message Date
taizan-hokouto
a5c7ba52c8 Merge branch 'hotfix/test' 2020-11-17 01:11:22 +09:00
taizan-hokouto
c37201fa03 Remove tests 2020-11-17 01:10:54 +09:00
taizan-hokouto
a474899268 Merge branch 'hotfix/tests' 2020-11-17 01:00:39 +09:00
taizan-hokouto
3f72eb0e00 Remove tests 2020-11-17 00:59:48 +09:00
taizan-hokouto
4652a56bc6 Merge branch 'hotfix/json' 2020-11-16 23:32:32 +09:00
taizan-hokouto
35218a66da Remove unnecessary import 2020-11-16 23:32:14 +09:00
taizan-hokouto
3432609588 Merge branch 'hotfix/json' 2020-11-16 23:29:50 +09:00
taizan-hokouto
48669e5f53 Fix tests 2020-11-16 23:29:24 +09:00
taizan-hokouto
f46df3ae42 Merge branch 'hotfix/json' 2020-11-16 23:17:36 +09:00
taizan-hokouto
96c028bd5d Increment version 2020-11-16 23:17:10 +09:00
taizan-hokouto
402dc15d7a Add tests 2020-11-16 23:11:51 +09:00
taizan-hokouto
6088ab6932 Fix jsonifying 2020-11-16 22:50:53 +09:00
taizan-hokouto
d98d34d8b3 Merge branch 'release/v0.4.5' 2020-11-16 01:50:49 +09:00
taizan-hokouto
24fa104e84 Increment version 2020-11-16 01:50:25 +09:00
taizan-hokouto
b4dad8c641 Merge branch 'feature/archiver' into develop 2020-11-16 01:49:34 +09:00
taizan-hokouto
3550cd6d91 Use temporary file to reduce memory usage 2020-11-16 01:37:31 +09:00
taizan-hokouto
2815b48e0e Return filename 2020-11-16 01:36:59 +09:00
taizan-hokouto
650e6ccb65 Remove unnecessary lines 2020-11-16 01:17:10 +09:00
taizan-hokouto
4a00a19a43 Change argument name 2020-11-16 01:16:09 +09:00
taizan-hokouto
b067eda7b6 Separate modules 2020-11-16 01:15:36 +09:00
taizan-hokouto
1b6bc86e76 Fix handling exception 2020-11-15 23:49:36 +09:00
taizan-hokouto
da2b513bcc Reduce delay 2020-11-15 19:52:00 +09:00
taizan-hokouto
6adae578ef Return generator instead of list 2020-11-15 19:50:53 +09:00
taizan-hokuto
128a834841 Merge branch 'hotfix/fix' 2020-11-15 16:54:24 +09:00
taizan-hokuto
086a14115f Merge tag 'fix' into develop 2020-11-15 16:54:24 +09:00
taizan-hokuto
6a392f3e1a Increment version 2020-11-15 16:53:36 +09:00
taizan-hokuto
93127a703c Revert 2020-11-15 16:53:03 +09:00
taizan-hokuto
e4ddbaf8ae Merge branch 'develop' 2020-11-15 16:39:07 +09:00
taizan-hokuto
ec75058605 Merge pull request #22 from wakamezake/github_actions
Add GitHub actions
2020-11-15 16:05:13 +09:00
taizan-hokouto
2b62e5dc5e Merge branch 'feature/pr_22' into develop 2020-11-15 15:59:52 +09:00
taizan-hokouto
8d7874096e Fix datetime tests 2020-11-15 15:59:28 +09:00
taizan-hokouto
99fcab83c8 Revert 2020-11-15 15:49:39 +09:00
wakamezake
3027bc0579 change timezone utc to jst 2020-11-15 15:39:16 +09:00
wakamezake
b1b70a4e76 delete cache 2020-11-15 15:39:16 +09:00
wakamezake
de41341d84 typo 2020-11-15 15:39:16 +09:00
wakamezake
a03d43b081 version up 2020-11-15 15:39:16 +09:00
wakamezake
f60aaade7f init 2020-11-15 15:39:16 +09:00
wakamezake
d3c34086ff change timezone utc to jst 2020-11-15 11:29:12 +09:00
wakamezake
6b58c9bcf5 delete cache 2020-11-15 10:50:14 +09:00
wakamezake
c2cba1651e Merge remote-tracking branch 'upstream/master' into github_actions 2020-11-15 10:40:00 +09:00
taizan-hokouto
ada3eb437d Merge branch 'hotfix/test_requirements' 2020-11-15 09:22:38 +09:00
taizan-hokouto
c1517d5be8 Merge branch 'master' into develop 2020-11-15 09:22:38 +09:00
taizan-hokouto
351034d1e6 Increment version 2020-11-15 09:21:58 +09:00
taizan-hokouto
c1db5a0c47 Update requirements.txt and requirements_test.txt 2020-11-15 09:18:01 +09:00
wakamezake
088dce712a typo 2020-11-14 18:08:41 +09:00
wakamezake
425e880b09 version up 2020-11-14 18:07:30 +09:00
wakamezake
62ec78abee init 2020-11-14 18:04:49 +09:00
taizan-hokouto
c84a32682c Merge branch 'hotfix/fix_prompt' 2020-11-08 12:31:52 +09:00
taizan-hokouto
74277b2afe Merge branch 'master' into develop 2020-11-08 12:31:52 +09:00
taizan-hokouto
cd20b74b2a Increment version 2020-11-08 12:31:16 +09:00
taizan-hokouto
06f54fd985 Remove unnecessary console output 2020-11-08 12:30:40 +09:00
taizan-hokouto
98b0470703 Merge tag 'emoji' into develop
v0.4.1
2020-11-06 19:58:45 +09:00
taizan-hokouto
bb4113b53c Merge branch 'hotfix/emoji' 2020-11-06 19:58:44 +09:00
taizan-hokouto
07f4382ed4 Increment version 2020-11-06 19:57:16 +09:00
taizan-hokouto
d40720616b Fix emoji encoding 2020-11-06 19:56:54 +09:00
taizan-hokouto
eebe7c79bd Merge branch 'master' into develop 2020-11-05 22:19:11 +09:00
taizan-hokouto
6c9e327e36 Merge branch 'hotfix/fix_readme' 2020-11-05 22:19:11 +09:00
taizan-hokouto
e9161c0ddd Update README 2020-11-05 22:18:54 +09:00
taizan-hokouto
c8b75dcf0e Merge branch 'master' into develop 2020-11-05 00:14:50 +09:00
taizan-hokouto
30cb7d7043 Merge branch 'hotfix/fix_readme' 2020-11-05 00:14:50 +09:00
taizan-hokouto
19d5b74beb Update README 2020-11-05 00:14:36 +09:00
taizan-hokouto
d5c3e45edc Merge branch 'master' into develop 2020-11-03 20:21:53 +09:00
taizan-hokouto
1d479fc15c Merge branch 'hotfix/fix_readme' 2020-11-03 20:21:52 +09:00
taizan-hokouto
20a20ddd08 Update README 2020-11-03 20:21:39 +09:00
taizan-hokouto
00c239f974 Merge branch 'master' into develop 2020-11-03 20:10:48 +09:00
taizan-hokouto
67b766b32c Merge branch 'hotfix/fix_readme' 2020-11-03 20:10:48 +09:00
taizan-hokouto
249aa0d147 Update README 2020-11-03 20:10:34 +09:00
taizan-hokouto
c708a588d8 Merge tag 'v0.4.0' into develop
v0.4.0
2020-11-03 18:20:10 +09:00
18 changed files with 245 additions and 195 deletions

27
.github/workflows/run_test.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
name: Run All UnitTest
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
matrix:
python-version: [3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt -r requirements_test.txt
- name: Test with pytest
run: |
export PYTHONPATH=./
pytest --verbose --color=yes

View File

@@ -24,12 +24,14 @@ pip install pytchat
### CLI ### CLI
One-liner command. + One-liner command.
+ Save chat data to html with embedded custom emojis.
+ Show chat stream (--echo option).
Save chat data to html with embedded custom emojis.
Show chat stream (--echo option).
```bash ```bash
$ pytchat -v https://www.youtube.com/watch?v=uIx8l2xlYVY -o "c:/temp/" $ pytchat -v uIx8l2xlYVY -o "c:/temp/"
# options: # options:
# -v : Video ID or URL that includes ID # -v : Video ID or URL that includes ID
# -o : output directory (default path: './') # -o : output directory (default path: './')
@@ -38,7 +40,7 @@ $ pytchat -v https://www.youtube.com/watch?v=uIx8l2xlYVY -o "c:/temp/"
``` ```
### On-demand mode with simple non-buffered object. ### Fetch chat data (see [wiki](https://github.com/taizan-hokuto/pytchat/wiki/PytchatCore))
```python ```python
import pytchat import pytchat
chat = pytchat.create(video_id="uIx8l2xlYVY") chat = pytchat.create(video_id="uIx8l2xlYVY")
@@ -47,7 +49,8 @@ while chat.is_alive():
print(f"{c.datetime} [{c.author.name}]- {c.message}") print(f"{c.datetime} [{c.author.name}]- {c.message}")
``` ```
### Output JSON format (feature of [DefaultProcessor](DefaultProcessor))
### Output JSON format string (feature of [DefaultProcessor](https://github.com/taizan-hokuto/pytchat/wiki/DefaultProcessor))
```python ```python
import pytchat import pytchat
import time import time
@@ -58,35 +61,21 @@ while chat.is_alive():
time.sleep(5) time.sleep(5)
''' '''
# Each chat item can also be output in JSON format. # Each chat item can also be output in JSON format.
for c in chat.get().sync_items(): for c in chat.get().items:
print(c.json()) print(c.json())
''' '''
``` ```
### other ### other
#### Fetch chat with buffer. + Fetch chat with a buffer ([LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat))
[LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat)
#### Asyncio Context + Use with asyncio ([LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync))
[LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync)
#### [YT API compatible chat processor]https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor) + YT API compatible chat processor ([CompatibleProcessor](https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor))
### [Extract archived chat data](https://github.com/taizan-hokuto/pytchat/wiki/Extractor) + Extract archived chat data ([Extractor](https://github.com/taizan-hokuto/pytchat/wiki/Extractor))
```python
from pytchat import HTMLArchiver, Extractor
video_id = "*******"
ex = Extractor(
video_id,
div=10,
processor=HTMLArchiver("c:/test.html")
)
ex.extract()
print("finished.")
```
## Structure of Default Processor ## Structure of Default Processor
Each item can be got with `sync_items()` function. Each item can be got with `sync_items()` function.

View File

@@ -1,8 +1,8 @@
""" """
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup. pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
""" """
__copyright__ = 'Copyright (C) 2019 taizan-hokuto' __copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.4.0' __version__ = '0.4.6'
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'taizan-hokuto' __author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com' __author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'

View File

@@ -1,31 +1,21 @@
import argparse import argparse
import asyncio
try: try:
from asyncio import CancelledError from asyncio import CancelledError
except ImportError: except ImportError:
from asyncio.futures import CancelledError from asyncio.futures import CancelledError
import os import os
import signal
from json.decoder import JSONDecodeError
from pathlib import Path
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
from .arguments import Arguments from .arguments import Arguments
from .echo import Echo from .echo import Echo
from .progressbar import ProgressBar from .. exceptions import InvalidVideoIdException
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
from .. import util
from .. import __version__ from .. import __version__
from .cli_extractor import CLIExtractor
''' '''
Most of CLI modules refer to Most of CLI modules refer to
Petter Kraabøl's Twitch-Chat-Downloader Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License) (MIT License)
''' '''
@@ -38,20 +28,19 @@ def main():
'If ID starts with a hyphen (-), enclose the ID in square brackets.') 'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str, parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./') help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true', parser.add_argument(f'--{Arguments.Name.DEBUG}', action='store_true',
help='Save error data when error occurs(".dat" file)') help='Debug mode. Stop when exceptions have occurred and save error data (".dat" file).')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true', parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version') help='Show version.')
parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true', parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true',
help='Show chats of specified video') help='Display chats of specified video.')
Arguments(parser.parse_args().__dict__) Arguments(parser.parse_args().__dict__)
if Arguments().print_version: if Arguments().print_version:
print(f'pytchat v{__version__} © 2019,2020 taizan-hokuto') print(f'pytchat v{__version__} © 2019, 2020 taizan-hokuto')
return return
# Extractor
if not Arguments().video_ids: if not Arguments().video_ids:
parser.print_help() parser.print_help()
return return
@@ -59,7 +48,7 @@ def main():
# Echo # Echo
if Arguments().echo: if Arguments().echo:
if len(Arguments().video_ids) > 1: if len(Arguments().video_ids) > 1:
print("You can specify only one video ID.") print("When using --echo option, only one video ID can be specified.")
return return
try: try:
Echo(Arguments().video_ids[0]).run() Echo(Arguments().video_ids[0]).run()
@@ -67,111 +56,16 @@ def main():
print("Invalid video id:", str(e)) print("Invalid video id:", str(e))
except Exception as e: except Exception as e:
print(type(e), str(e)) print(type(e), str(e))
if Arguments().debug:
raise
finally: finally:
return return
# Extractor
if not os.path.exists(Arguments().output): if not os.path.exists(Arguments().output):
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output)) print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
return return
try: try:
Runner().run() CLIExtractor().run()
except CancelledError as e: except CancelledError as e:
print(str(e)) print(str(e))
class Runner:
def run(self) -> None:
ex = None
pbar = None
for counter, video_id in enumerate(Arguments().video_ids):
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try:
video_id = extract_video_id(video_id)
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
try:
info = VideoInfo(video_id)
except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().save_error_data:
util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue
print(f"\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}\n"
f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status_txt="Extracting")
ex = Extractor(video_id,
callback=pbar.disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: self.cancel(ex, pbar)))
data = ex.extract()
if data == []:
continue
pbar.reset("#", "=", total=len(data), status_txt="Rendering ")
processor = HTMLArchiver(path, callback=pbar.disp)
processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
pbar.reset('#', '#', status_txt='Completed ')
pbar.close()
print()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
print(f"Abort:{str(e)}:[{video_id}]")
except (JSONDecodeError, PatternUnmatchError) as e:
print("{}:{}".format(e.msg, video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_", ".dat")
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
except Exception as e:
print(f"Abort:{str(type(e))} {str(e)[:80]}")
finally:
clear_tasks()
return
def cancel(self, ex=None, pbar=None) -> None:
'''Called when keyboard interrupted has occurred.
'''
print("\nKeyboard interrupted.\n")
if ex and pbar:
ex.cancel()
pbar.cancel()
def clear_tasks():
'''
Clear remained tasks.
Called when internal exception has occurred or
after each extraction process is completed.
'''
async def _shutdown():
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(_shutdown())
except Exception as e:
print(e)

View File

@@ -18,7 +18,7 @@ class Arguments(metaclass=Singleton):
VERSION: str = 'version' VERSION: str = 'version'
OUTPUT: str = 'output_dir' OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id' VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data' DEBUG: bool = 'debug'
ECHO: bool = 'echo' ECHO: bool = 'echo'
def __init__(self, def __init__(self,
@@ -36,10 +36,10 @@ class Arguments(metaclass=Singleton):
self.print_version: bool = arguments[Arguments.Name.VERSION] self.print_version: bool = arguments[Arguments.Name.VERSION]
self.output: str = arguments[Arguments.Name.OUTPUT] self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = [] self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA] self.debug: bool = arguments[Arguments.Name.DEBUG]
self.echo: bool = arguments[Arguments.Name.ECHO] self.echo: bool = arguments[Arguments.Name.ECHO]
# Videos
# Videos
if arguments[Arguments.Name.VIDEO_IDS]: if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id self.video_ids = [video_id
for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')] for video_id in arguments[Arguments.Name.VIDEO_IDS].split(',')]

View File

@@ -0,0 +1,121 @@
import asyncio
import os
import signal
import traceback
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
from json.decoder import JSONDecodeError
from pathlib import Path
from .arguments import Arguments
from .progressbar import ProgressBar
from .. import util
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
class CLIExtractor:
def run(self) -> None:
ex = None
pbar = None
for counter, video_id in enumerate(Arguments().video_ids):
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try:
video_id = extract_video_id(video_id)
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
try:
info = VideoInfo(video_id)
except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().debug:
util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue
print(f"\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}\n"
f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status_txt="Extracting")
ex = Extractor(video_id,
callback=pbar.disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: self.cancel(ex, pbar)))
data = ex.extract()
if data == [] or data is None:
continue
pbar.reset("#", "=", total=1000, status_txt="Rendering ")
processor = HTMLArchiver(path, callback=pbar.disp)
processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
pbar.reset('#', '#', status_txt='Completed ')
pbar.close()
print()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
print(f"Abort:{str(e)}:[{video_id}]")
except (JSONDecodeError, PatternUnmatchError) as e:
print("{}:{}".format(e.msg, video_id))
if Arguments().debug:
filename = util.save(e.doc, "ERR_", ".dat")
traceback.print_exc()
print(f"Saved error data: {filename}")
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
if Arguments().debug:
traceback.print_exc()
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
except Exception as e:
print(f"Abort:{str(type(e))} {str(e)[:80]}")
if Arguments().debug:
traceback.print_exc()
finally:
clear_tasks()
return
def cancel(self, ex=None, pbar=None) -> None:
'''Called when keyboard interrupted has occurred.
'''
print("\nKeyboard interrupted.\n")
if ex and pbar:
ex.cancel()
pbar.cancel()
def clear_tasks():
'''
Clear remained tasks.
Called when internal exception has occurred or
after each extraction process is completed.
'''
async def _shutdown():
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(_shutdown())
except Exception as e:
print(str(e))
if Arguments().debug:
traceback.print_exc()

View File

@@ -118,13 +118,10 @@ class PytchatCore:
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
self._raise_exception(e) self._raise_exception(e)
except (TypeError, json.JSONDecodeError) as e: except Exception as e:
self._logger.error(f"{traceback.format_exc(limit=-1)}") self._logger.error(f"{traceback.format_exc(limit=-1)}")
self._raise_exception(e) self._raise_exception(e)
self._logger.debug(f"[{self._video_id}]finished fetching chat.")
self._raise_exception(exceptions.ChatDataFinished)
def _get_contents(self, continuation, client, headers): def _get_contents(self, continuation, client, headers):
'''Get 'continuationContents' from livechat json. '''Get 'continuationContents' from livechat json.
If contents is None at first fetching, If contents is None at first fetching,
@@ -201,7 +198,7 @@ class PytchatCore:
raise self._exception_holder raise self._exception_holder
def _raise_exception(self, exception: Exception = None): def _raise_exception(self, exception: Exception = None):
self._is_alive = False self.terminate()
if self._hold_exception is False: if self._hold_exception is False:
raise exception raise exception
self._exception_holder = exception self._exception_holder = exception

View File

@@ -186,12 +186,12 @@ class LiveChatAsync:
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
raise raise
except (TypeError, json.JSONDecodeError): except Exception:
self._logger.error(f"{traceback.format_exc(limit = -1)}") self._logger.error(f"{traceback.format_exc(limit = -1)}")
raise raise
self._logger.debug(f"[{self._video_id}] finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
raise exceptions.ChatDataFinished
async def _check_pause(self, continuation): async def _check_pause(self, continuation):
if self._pauser.empty(): if self._pauser.empty():

View File

@@ -179,12 +179,12 @@ class LiveChat:
except exceptions.ChatParseException as e: except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}") self._logger.debug(f"[{self._video_id}]{str(e)}")
raise raise
except (TypeError, json.JSONDecodeError): except Exception:
self._logger.error(f"{traceback.format_exc(limit=-1)}") self._logger.error(f"{traceback.format_exc(limit=-1)}")
raise raise
self._logger.debug(f"[{self._video_id}] finished fetching chat.") self._logger.debug(f"[{self._video_id}] finished fetching chat.")
raise exceptions.ChatDataFinished
def _check_pause(self, continuation): def _check_pause(self, continuation):
if self._pauser.empty(): if self._pauser.empty():

View File

@@ -112,7 +112,7 @@ class Chatdata:
await asyncio.sleep(1 - stop_interval) await asyncio.sleep(1 - stop_interval)
def json(self) -> str: def json(self) -> str:
return json.dumps([vars(a) for a in self.items], ensure_ascii=False, cls=CustomEncoder) return ''.join(("[", ','.join((a.json() for a in self.items)), "]"))
class DefaultProcessor(ChatProcessor): class DefaultProcessor(ChatProcessor):
@@ -137,7 +137,7 @@ class DefaultProcessor(ChatProcessor):
if component is None: if component is None:
continue continue
timeout += component.get('timeout', 0) timeout += component.get('timeout', 0)
chatdata = component.get('chatdata') chatdata = component.get('chatdata') # if from Extractor, chatdata is generator.
if chatdata is None: if chatdata is None:
continue continue
for action in chatdata: for action in chatdata:
@@ -153,7 +153,7 @@ class DefaultProcessor(ChatProcessor):
chatlist.append(chat) chatlist.append(chat)
if self.first and chatlist: if self.first and chatlist:
self.abs_diff = time.time() - chatlist[0].timestamp / 1000 + 2 self.abs_diff = time.time() - chatlist[0].timestamp / 1000
self.first = False self.first = False
chatdata = Chatdata(chatlist, float(timeout), self.abs_diff) chatdata = Chatdata(chatlist, float(timeout), self.abs_diff)

View File

@@ -7,7 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
from .chat_processor import ChatProcessor from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor from .default.processor import DefaultProcessor
from ..exceptions import UnknownConnectionError from ..exceptions import UnknownConnectionError
import tempfile
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$") PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -51,11 +51,12 @@ class HTMLArchiver(ChatProcessor):
self.client = httpx.Client(http2=True) self.client = httpx.Client(http2=True)
self.save_path = self._checkpath(save_path) self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor() self.processor = DefaultProcessor()
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary. self.emoji_table = {} # dict for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
self.callback = callback self.callback = callback
self.executor = ThreadPoolExecutor(max_workers=10) self.executor = ThreadPoolExecutor(max_workers=10)
self.tmp_fp = tempfile.NamedTemporaryFile(mode="a", encoding="utf-8", delete=False)
self.tmp_filename = self.tmp_fp.name
self.counter = 0
def _checkpath(self, filepath): def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath)) splitter = os.path.splitext(os.path.basename(filepath))
@@ -85,9 +86,9 @@ class HTMLArchiver(ChatProcessor):
Count of total lines written to the file. Count of total lines written to the file.
""" """
if chat_components is None or len(chat_components) == 0: if chat_components is None or len(chat_components) == 0:
return return self.save_path ,self.counter
for c in self.processor.process(chat_components).items: for c in self.processor.process(chat_components).items:
self.body.extend( self.tmp_fp.write(
self._parse_html_line(( self._parse_html_line((
c.datetime, c.datetime,
c.elapsedTime, c.elapsedTime,
@@ -100,6 +101,8 @@ class HTMLArchiver(ChatProcessor):
) )
if self.callback: if self.callback:
self.callback(None, 1) self.callback(None, 1)
self.counter += 1
return self.save_path, self.counter
def _parse_html_line(self, raw_line): def _parse_html_line(self, raw_line):
return ''.join(('<tr>', return ''.join(('<tr>',
@@ -123,7 +126,6 @@ class HTMLArchiver(ChatProcessor):
resp = self.client.get(url, timeout=30) resp = self.client.get(url, timeout=30)
break break
except httpx.HTTPError as e: except httpx.HTTPError as e:
print("Network Error. retrying...")
err = e err = e
time.sleep(3) time.sleep(3)
else: else:
@@ -132,7 +134,7 @@ class HTMLArchiver(ChatProcessor):
return standard_b64encode(resp.content).decode() return standard_b64encode(resp.content).decode()
def _set_emoji_table(self, item: dict): def _set_emoji_table(self, item: dict):
emoji_id = item['id'] emoji_id = ''.join(('Z', item['id'])) if 48 <= ord(item['id'][0]) <= 57 else item['id']
if emoji_id not in self.emoji_table: if emoji_id not in self.emoji_table:
self.emoji_table.setdefault(emoji_id, self.executor.submit(self._encode_img, item['url'])) self.emoji_table.setdefault(emoji_id, self.executor.submit(self._encode_img, item['url']))
return emoji_id return emoji_id
@@ -150,9 +152,19 @@ class HTMLArchiver(ChatProcessor):
'</style>\n')) '</style>\n'))
def finalize(self): def finalize(self):
self.executor.shutdown() if self.tmp_fp:
self.header.extend([self._create_styles(), '</head>\n']) self.tmp_fp.flush()
self.body.extend(['</table>\n</body>\n</html>']) self.tmp_fp = None
with open(self.save_path, mode='a', encoding='utf-8') as f: with open(self.save_path, mode='w', encoding='utf-8') as outfile:
f.writelines(self.header) # write header
f.writelines(self.body) outfile.writelines((
HEADER_HTML, self._create_styles(), '</head>\n',
'<body>\n', '<table class="css">\n',
self._parse_table_header(fmt_headers)))
# write body
fp = open(self.tmp_filename, mode="r", encoding="utf-8")
for line in fp:
outfile.write(line)
outfile.write('</table>\n</body>\n</html>')
fp.close()
os.remove(self.tmp_filename)

View File

@@ -1,3 +1,4 @@
from typing import Generator
from . import asyncdl from . import asyncdl
from . import duplcheck from . import duplcheck
from .. videoinfo import VideoInfo from .. videoinfo import VideoInfo
@@ -60,11 +61,10 @@ class Extractor:
self.blocks = duplcheck.remove_duplicate_tail(self.blocks) self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
return self return self
def _combine(self): def _get_chatdata(self) -> Generator:
ret = []
for block in self.blocks: for block in self.blocks:
ret.extend(block.chat_data) for chatdata in block.chat_data:
return ret yield chatdata
def _execute_extract_operations(self): def _execute_extract_operations(self):
return ( return (
@@ -74,7 +74,7 @@ class Extractor:
._remove_overlap() ._remove_overlap()
._download_blocks() ._download_blocks()
._remove_duplicate_tail() ._remove_duplicate_tail()
._combine() ._get_chatdata()
) )
def extract(self): def extract(self):

View File

@@ -16,10 +16,11 @@ def extract(url):
json.dump(html.json(), f, ensure_ascii=False) json.dump(html.json(), f, ensure_ascii=False)
def save(data, filename, extention): def save(data, filename, extention) -> str:
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention, save_filename = filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
mode='w', encoding='utf-8') as f: with open(save_filename ,mode='w', encoding='utf-8') as f:
f.writelines(data) f.writelines(data)
return save_filename
def checkpath(filepath): def checkpath(filepath):

View File

@@ -1,4 +1,4 @@
httpx[http2]==0.14.1 httpx[http2]==0.16.1
protobuf==3.13.0 protobuf==3.14.0
pytz pytz
urllib3 urllib3

View File

@@ -1,4 +1,2 @@
mock pytest-mock==3.3.1
mocker pytest-httpx==0.10.0
pytest
pytest_httpx

View File

@@ -1,8 +1,17 @@
import json import json
from datetime import datetime
from pytchat.parser.live import Parser from pytchat.parser.live import Parser
from pytchat.processors.default.processor import DefaultProcessor from pytchat.processors.default.processor import DefaultProcessor
TEST_TIMETSTAMP = 1570678496000000
def get_local_datetime(timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def test_textmessage(mocker): def test_textmessage(mocker):
'''text message''' '''text message'''
processor = DefaultProcessor() processor = DefaultProcessor()
@@ -20,7 +29,7 @@ def test_textmessage(mocker):
assert ret.id == "dummy_id" assert ret.id == "dummy_id"
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.author.name == "author_name" assert ret.author.name == "author_name"
assert ret.author.channelId == "author_channel_id" assert ret.author.channelId == "author_channel_id"
assert ret.author.channelUrl == "http://www.youtube.com/channel/author_channel_id" assert ret.author.channelUrl == "http://www.youtube.com/channel/author_channel_id"
@@ -51,7 +60,7 @@ def test_textmessage_replay_member(mocker):
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
assert ret.messageEx == ["dummy_message"] assert ret.messageEx == ["dummy_message"]
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "1:23:45" assert ret.elapsedTime == "1:23:45"
assert ret.author.name == "author_name" assert ret.author.name == "author_name"
assert ret.author.channelId == "author_channel_id" assert ret.author.channelId == "author_channel_id"
@@ -83,7 +92,7 @@ def test_superchat(mocker):
assert ret.message == "dummy_message" assert ret.message == "dummy_message"
assert ret.messageEx == ["dummy_message"] assert ret.messageEx == ["dummy_message"]
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "" assert ret.elapsedTime == ""
assert ret.amountValue == 800 assert ret.amountValue == 800
assert ret.amountString == "¥800" assert ret.amountString == "¥800"
@@ -125,7 +134,7 @@ def test_supersticker(mocker):
assert ret.message == "" assert ret.message == ""
assert ret.messageEx == [] assert ret.messageEx == []
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "" assert ret.elapsedTime == ""
assert ret.amountValue == 200 assert ret.amountValue == 200
assert ret.amountString == "¥200" assert ret.amountString == "¥200"
@@ -166,7 +175,7 @@ def test_sponsor(mocker):
assert ret.message == "新規メンバー" assert ret.message == "新規メンバー"
assert ret.messageEx == ["新規メンバー"] assert ret.messageEx == ["新規メンバー"]
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "" assert ret.elapsedTime == ""
assert ret.bgColor == 0 assert ret.bgColor == 0
assert ret.author.name == "author_name" assert ret.author.name == "author_name"
@@ -199,7 +208,7 @@ def test_sponsor_legacy(mocker):
assert ret.message == "新規メンバー / ようこそ、author_name" assert ret.message == "新規メンバー / ようこそ、author_name"
assert ret.messageEx == ["新規メンバー / ようこそ、author_name"] assert ret.messageEx == ["新規メンバー / ようこそ、author_name"]
assert ret.timestamp == 1570678496000 assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56" assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "" assert ret.elapsedTime == ""
assert ret.bgColor == 0 assert ret.bgColor == 0
assert ret.author.name == "author_name" assert ret.author.name == "author_name"

View File

@@ -0,0 +1 @@
{"author": {"badgeUrl": "", "type": "", "isVerified": false, "isChatOwner": false, "isChatSponsor": false, "isChatModerator": false, "channelId": "author_channel_id", "channelUrl": "http://www.youtube.com/channel/author_channel_id", "name": "author_name", "imageUrl": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s64-x-x-xx-xx-xx-c0xffffff/photo.jpg"}, "type": "superChat", "id": "dummy_id", "timestamp": 1570678496000, "elapsedTime": "", "datetime": "2019-10-10 12:34:56", "message": "dummy_message", "messageEx": ["dummy_message"], "amountValue": 800.0, "amountString": "¥800", "currency": "JPY", "bgColor": 4280150454, "colors": {"headerBackgroundColor": 4278239141, "headerTextColor": 4278190080, "bodyBackgroundColor": 4280150454, "bodyTextColor": 4278190080, "timestampColor": 2147483648, "authorNameTextColor": 2315255808}}

View File

@@ -0,0 +1 @@
[{"author": {"badgeUrl": "", "type": "", "isVerified": false, "isChatOwner": false, "isChatSponsor": false, "isChatModerator": false, "channelId": "author_channel_id", "channelUrl": "http://www.youtube.com/channel/author_channel_id", "name": "author_name", "imageUrl": "https://yt3.ggpht.com/------------/AAAAAAAAAAA/AAAAAAAAAAA/xxxxxxxxxxxx/s64-x-x-xx-xx-xx-c0xffffff/photo.jpg"}, "type": "superChat", "id": "dummy_id", "timestamp": 1570678496000, "elapsedTime": "", "datetime": "2019-10-10 12:34:56", "message": "dummy_message", "messageEx": ["dummy_message"], "amountValue": 800.0, "amountString": "¥800", "currency": "JPY", "bgColor": 4280150454, "colors": {"headerBackgroundColor": 4278239141, "headerTextColor": 4278190080, "bodyBackgroundColor": 4280150454, "bodyTextColor": 4278190080, "timestampColor": 2147483648, "authorNameTextColor": 2315255808}}]