Compare commits

..

181 Commits

Author SHA1 Message Date
taizan-hokouto
aaf9860bdc Merge branch 'release/v0.4.7' 2020-11-18 01:25:10 +09:00
taizan-hokouto
83ad4dcf1f Increment version 2020-11-18 01:24:37 +09:00
taizan-hokouto
765251b872 Merge branch 'feature/pipenv' into develop 2020-11-18 01:17:35 +09:00
taizan-hokouto
7ea88fead2 Modify requirements 2020-11-18 01:16:49 +09:00
taizan-hokouto
ea67e3e54e Add pipenv files 2020-11-18 01:16:01 +09:00
taizan-hokouto
a5c7ba52c8 Merge branch 'hotfix/test' 2020-11-17 01:11:22 +09:00
taizan-hokouto
7cf780ee87 Merge branch 'master' into develop 2020-11-17 01:11:22 +09:00
taizan-hokouto
c37201fa03 Remove tests 2020-11-17 01:10:54 +09:00
taizan-hokouto
6fcc1393de Merge branch 'master' into develop 2020-11-17 01:01:56 +09:00
taizan-hokouto
a474899268 Merge branch 'hotfix/tests' 2020-11-17 01:00:39 +09:00
taizan-hokouto
3f72eb0e00 Remove tests 2020-11-17 00:59:48 +09:00
taizan-hokouto
661d1e4b81 Fix tests 2020-11-17 00:54:32 +09:00
taizan-hokouto
4652a56bc6 Merge branch 'hotfix/json' 2020-11-16 23:32:32 +09:00
taizan-hokouto
966320cab5 Merge branch 'master' into develop 2020-11-16 23:32:32 +09:00
taizan-hokouto
35218a66da Remove unnecessary import 2020-11-16 23:32:14 +09:00
taizan-hokouto
3432609588 Merge branch 'hotfix/json' 2020-11-16 23:29:50 +09:00
taizan-hokouto
3ad6b7e845 Merge branch 'master' into develop 2020-11-16 23:29:50 +09:00
taizan-hokouto
48669e5f53 Fix tests 2020-11-16 23:29:24 +09:00
taizan-hokouto
7b0708ec46 Merge branch 'master' into develop 2020-11-16 23:17:37 +09:00
taizan-hokouto
f46df3ae42 Merge branch 'hotfix/json' 2020-11-16 23:17:36 +09:00
taizan-hokouto
96c028bd5d Increment version 2020-11-16 23:17:10 +09:00
taizan-hokouto
402dc15d7a Add tests 2020-11-16 23:11:51 +09:00
taizan-hokouto
6088ab6932 Fix jsonifying 2020-11-16 22:50:53 +09:00
taizan-hokouto
13812bdad3 Merge tag 'v0.4.5' into develop
v0.4.5
2020-11-16 01:50:50 +09:00
taizan-hokouto
d98d34d8b3 Merge branch 'release/v0.4.5' 2020-11-16 01:50:49 +09:00
taizan-hokouto
24fa104e84 Increment version 2020-11-16 01:50:25 +09:00
taizan-hokouto
b4dad8c641 Merge branch 'feature/archiver' into develop 2020-11-16 01:49:34 +09:00
taizan-hokouto
3550cd6d91 Use temporary file to reduce memory usage 2020-11-16 01:37:31 +09:00
taizan-hokouto
2815b48e0e Return filename 2020-11-16 01:36:59 +09:00
taizan-hokouto
650e6ccb65 Remove unnecessary lines 2020-11-16 01:17:10 +09:00
taizan-hokouto
4a00a19a43 Change argument name 2020-11-16 01:16:09 +09:00
taizan-hokouto
b067eda7b6 Separate modules 2020-11-16 01:15:36 +09:00
taizan-hokouto
1b6bc86e76 Fix handling exception 2020-11-15 23:49:36 +09:00
taizan-hokouto
da2b513bcc Reduce delay 2020-11-15 19:52:00 +09:00
taizan-hokouto
6adae578ef Return generator instead of list 2020-11-15 19:50:53 +09:00
taizan-hokuto
128a834841 Merge branch 'hotfix/fix' 2020-11-15 16:54:24 +09:00
taizan-hokuto
086a14115f Merge tag 'fix' into develop 2020-11-15 16:54:24 +09:00
taizan-hokuto
6a392f3e1a Increment version 2020-11-15 16:53:36 +09:00
taizan-hokuto
93127a703c Revert 2020-11-15 16:53:03 +09:00
taizan-hokuto
e4ddbaf8ae Merge branch 'develop' 2020-11-15 16:39:07 +09:00
taizan-hokuto
ec75058605 Merge pull request #22 from wakamezake/github_actions
Add GitHub actions
2020-11-15 16:05:13 +09:00
taizan-hokouto
2b62e5dc5e Merge branch 'feature/pr_22' into develop 2020-11-15 15:59:52 +09:00
taizan-hokouto
8d7874096e Fix datetime tests 2020-11-15 15:59:28 +09:00
taizan-hokouto
99fcab83c8 Revert 2020-11-15 15:49:39 +09:00
wakamezake
3027bc0579 change timezone utc to jst 2020-11-15 15:39:16 +09:00
wakamezake
b1b70a4e76 delete cache 2020-11-15 15:39:16 +09:00
wakamezake
de41341d84 typo 2020-11-15 15:39:16 +09:00
wakamezake
a03d43b081 version up 2020-11-15 15:39:16 +09:00
wakamezake
f60aaade7f init 2020-11-15 15:39:16 +09:00
wakamezake
d3c34086ff change timezone utc to jst 2020-11-15 11:29:12 +09:00
wakamezake
6b58c9bcf5 delete cache 2020-11-15 10:50:14 +09:00
wakamezake
c2cba1651e Merge remote-tracking branch 'upstream/master' into github_actions 2020-11-15 10:40:00 +09:00
taizan-hokouto
ada3eb437d Merge branch 'hotfix/test_requirements' 2020-11-15 09:22:38 +09:00
taizan-hokouto
c1517d5be8 Merge branch 'master' into develop 2020-11-15 09:22:38 +09:00
taizan-hokouto
351034d1e6 Increment version 2020-11-15 09:21:58 +09:00
taizan-hokouto
c1db5a0c47 Update requirements.txt and requirements_test.txt 2020-11-15 09:18:01 +09:00
wakamezake
088dce712a typo 2020-11-14 18:08:41 +09:00
wakamezake
425e880b09 version up 2020-11-14 18:07:30 +09:00
wakamezake
62ec78abee init 2020-11-14 18:04:49 +09:00
taizan-hokouto
c84a32682c Merge branch 'hotfix/fix_prompt' 2020-11-08 12:31:52 +09:00
taizan-hokouto
74277b2afe Merge branch 'master' into develop 2020-11-08 12:31:52 +09:00
taizan-hokouto
cd20b74b2a Increment version 2020-11-08 12:31:16 +09:00
taizan-hokouto
06f54fd985 Remove unnecessary console output 2020-11-08 12:30:40 +09:00
taizan-hokouto
98b0470703 Merge tag 'emoji' into develop
v0.4.1
2020-11-06 19:58:45 +09:00
taizan-hokouto
bb4113b53c Merge branch 'hotfix/emoji' 2020-11-06 19:58:44 +09:00
taizan-hokouto
07f4382ed4 Increment version 2020-11-06 19:57:16 +09:00
taizan-hokouto
d40720616b Fix emoji encoding 2020-11-06 19:56:54 +09:00
taizan-hokouto
eebe7c79bd Merge branch 'master' into develop 2020-11-05 22:19:11 +09:00
taizan-hokouto
6c9e327e36 Merge branch 'hotfix/fix_readme' 2020-11-05 22:19:11 +09:00
taizan-hokouto
e9161c0ddd Update README 2020-11-05 22:18:54 +09:00
taizan-hokouto
c8b75dcf0e Merge branch 'master' into develop 2020-11-05 00:14:50 +09:00
taizan-hokouto
30cb7d7043 Merge branch 'hotfix/fix_readme' 2020-11-05 00:14:50 +09:00
taizan-hokouto
19d5b74beb Update README 2020-11-05 00:14:36 +09:00
taizan-hokouto
d5c3e45edc Merge branch 'master' into develop 2020-11-03 20:21:53 +09:00
taizan-hokouto
1d479fc15c Merge branch 'hotfix/fix_readme' 2020-11-03 20:21:52 +09:00
taizan-hokouto
20a20ddd08 Update README 2020-11-03 20:21:39 +09:00
taizan-hokouto
00c239f974 Merge branch 'master' into develop 2020-11-03 20:10:48 +09:00
taizan-hokouto
67b766b32c Merge branch 'hotfix/fix_readme' 2020-11-03 20:10:48 +09:00
taizan-hokouto
249aa0d147 Update README 2020-11-03 20:10:34 +09:00
taizan-hokouto
c708a588d8 Merge tag 'v0.4.0' into develop
v0.4.0
2020-11-03 18:20:10 +09:00
taizan-hokouto
cb15df525f Merge branch 'release/v0.4.0' 2020-11-03 18:20:09 +09:00
taizan-hokouto
fcddc1516b Increment version 2020-11-03 18:19:43 +09:00
taizan-hokouto
a7732efd07 Merge branch 'feature/new_method' into develop 2020-11-03 18:18:43 +09:00
taizan-hokouto
0a2f4e8418 Update tests 2020-11-03 18:14:17 +09:00
taizan-hokouto
0c0ba0dfe6 Update README 2020-11-03 18:13:25 +09:00
taizan-hokouto
02827b174e Update tests 2020-11-03 18:13:09 +09:00
taizan-hokouto
81dee8a218 Fix comments 2020-11-03 16:51:30 +09:00
taizan-hokouto
5eb8bdbd0e Fix parsing info 2020-11-03 15:44:44 +09:00
taizan-hokouto
a37602e666 Fix keyboard interrupt process 2020-11-03 11:57:24 +09:00
taizan-hokouto
306b69198e Update README 2020-11-03 01:59:16 +09:00
taizan-hokouto
175e457052 Improve processing custom emojis 2020-11-02 22:44:09 +09:00
taizan-hokouto
5633a48618 Implement finalize() 2020-11-02 22:08:17 +09:00
taizan-hokouto
d7e608e8a1 Flake8 2020-11-02 00:26:46 +09:00
taizan-hokouto
213427fab3 Flake8 2020-11-02 00:26:27 +09:00
taizan-hokouto
3427c6fb69 Remove unnecessary line 2020-11-02 00:25:31 +09:00
taizan-hokouto
603c4470b7 Flake8 2020-11-02 00:25:05 +09:00
taizan-hokouto
37c8b7ae45 Use client instead of direct httpx 2020-11-01 21:58:41 +09:00
taizan-hokouto
d362152c77 Change module name 2020-11-01 19:29:09 +09:00
taizan-hokouto
8f5c3f312a Add --echo option to cli 2020-10-29 01:40:43 +09:00
taizan-hokouto
15a1d5c210 Implement exception holder 2020-10-29 01:39:07 +09:00
taizan-hokouto
499cf26fa8 Integrate httpx exceptions 2020-10-26 23:39:33 +09:00
taizan-hokouto
90596be880 Fix comment 2020-10-26 22:49:31 +09:00
taizan-hokouto
50d7b097e6 Remove unnecessary module 2020-10-26 22:34:43 +09:00
taizan-hokouto
b8d5ec5465 Remove unnecessary lines 2020-10-26 22:34:25 +09:00
taizan-hokouto
3200c5654f Change structure of default processor 2020-10-24 19:12:00 +09:00
taizan-hokouto
4905b1e4d8 Add simple core module 2020-10-24 18:07:54 +09:00
taizan-hokouto
16df63c14e Fix comments 2020-10-24 16:10:04 +09:00
taizan-hokouto
e950dff9d2 Merge tag 'fix_json' into develop
v0.3.2
2020-10-06 01:30:16 +09:00
taizan-hokouto
39d99ad4af Merge branch 'hotfix/fix_json' 2020-10-06 01:30:15 +09:00
taizan-hokouto
3675c91240 Increment version 2020-10-06 01:24:31 +09:00
taizan-hokouto
46258f625a Fix import module 2020-10-06 01:24:04 +09:00
taizan-hokouto
2cc161b589 Increment version 2020-10-06 01:20:25 +09:00
taizan-hokouto
115277e5e1 Fix handling internal error and keyboard interrupt 2020-10-06 01:19:45 +09:00
taizan-hokouto
ebf0e7c181 Fix handling json decode error and pattern unmatch 2020-10-05 21:38:51 +09:00
taizan-hokouto
b418898eef Merge tag 'filepath' into develop
v0.3.0
2020-10-04 11:33:59 +09:00
taizan-hokouto
3106b3e545 Merge branch 'hotfix/filepath' 2020-10-04 11:33:58 +09:00
taizan-hokouto
50816a661d Increment version 2020-10-04 11:30:07 +09:00
taizan-hokouto
6755bc8bb2 Make sure to pass fixed filepath to processor 2020-10-04 11:29:52 +09:00
taizan-hokouto
d62e7730ab Merge tag 'fix' into develop
v0.2.9
2020-10-04 10:32:54 +09:00
taizan-hokouto
26be989b9b Merge branch 'hotfix/fix' 2020-10-04 10:32:53 +09:00
taizan-hokouto
73ad0a1f44 Increment version 2020-10-04 10:22:34 +09:00
taizan-hokouto
66b185ebf7 Fix constructing filepath 2020-10-04 10:20:14 +09:00
taizan_hokuto
8bd82713e2 Merge tag 'fix' into develop
v0.2.7
2020-10-03 22:42:48 +09:00
taizan_hokuto
71650c39f7 Merge branch 'hotfix/fix' 2020-10-03 22:42:48 +09:00
taizan_hokuto
488445c73b Increment version 2020-10-03 22:41:53 +09:00
taizan_hokuto
075e811efe Delete unnecessary code 2020-10-03 22:41:12 +09:00
taizan_hokuto
9f9b83f185 Merge tag 'pattern' into develop
v0.2.6
2020-10-03 22:35:46 +09:00
taizan_hokuto
58d9bf7fdb Merge branch 'hotfix/pattern' 2020-10-03 22:35:46 +09:00
taizan_hokuto
b3e6275de7 Increment version 2020-10-03 22:35:22 +09:00
taizan_hokuto
748778f545 Fix pattern matching 2020-10-03 22:04:09 +09:00
taizan-hokuto
b2a68d0a74 Merge tag 'network' into develop
v0.2.5
2020-09-14 00:40:40 +09:00
taizan-hokuto
e29b3b8377 Merge branch 'hotfix/network' 2020-09-14 00:40:40 +09:00
taizan-hokuto
0859ed5fb1 Increment version 2020-09-14 00:29:21 +09:00
taizan-hokuto
a80d5ba080 Fix handling network error 2020-09-14 00:28:41 +09:00
taizan-hokuto
ac2924824e Merge tag 'memory' into develop
v0.2.4
2020-09-12 02:12:47 +09:00
taizan-hokuto
b7e6043a71 Merge branch 'hotfix/memory' 2020-09-12 02:12:46 +09:00
taizan-hokuto
820ba35013 Increment version 2020-09-12 02:02:07 +09:00
taizan-hokuto
ecd2d130bf Clear set each time the extraction changes 2020-09-12 01:57:55 +09:00
taizan-hokuto
1d410b6e68 Merge tag 'not_quit' into develop
v0.2.3
2020-09-12 00:57:49 +09:00
taizan-hokuto
f77a2c889b Merge branch 'hotfix/not_quit' 2020-09-12 00:57:48 +09:00
taizan-hokuto
47d5ab288f Increment version 2020-09-12 00:49:37 +09:00
taizan-hokuto
5f53fd24dd Format 2020-09-12 00:48:40 +09:00
taizan-hokuto
11a9d0e2d7 Fix a problem with extraction not completing 2020-09-12 00:42:30 +09:00
taizan-hokuto
6f18de46f7 Merge tag 'continue_error' into develop
v0.2.2
2020-09-11 00:21:07 +09:00
taizan-hokuto
480c9e15b8 Merge branch 'hotfix/continue_error' 2020-09-11 00:21:07 +09:00
taizan-hokuto
35aa7636f6 Increment version 2020-09-11 00:20:24 +09:00
taizan-hokuto
8fee67c2d4 Fix handling video info error 2020-09-11 00:18:09 +09:00
taizan-hokuto
74bfdd07e2 Merge tag 'v0.2.1' into develop
v0.2.1
2020-09-09 22:23:02 +09:00
taizan-hokuto
d3f1643a40 Merge branch 'release/v0.2.1' 2020-09-09 22:23:01 +09:00
taizan-hokuto
eb29f27493 Increment version 2020-09-09 22:22:31 +09:00
taizan-hokuto
8adf75ab83 Merge branch 'feature/pbar' into develop 2020-09-09 22:20:36 +09:00
taizan-hokuto
2e05803d75 Remove unnecessary option 2020-09-09 22:20:09 +09:00
taizan-hokuto
f16c0ee73a Fix progress bar line feed and remove pbar option 2020-09-09 22:19:10 +09:00
taizan-hokuto
a338f2b782 Merge tag 'v0.2.0' into develop
v0.2.0
2020-09-07 23:35:45 +09:00
taizan-hokuto
864ccddfd7 Merge branch 'release/v0.2.0' 2020-09-07 23:35:44 +09:00
taizan-hokuto
339df69e36 Increment version 2020-09-07 23:35:14 +09:00
taizan-hokuto
76a5b0cd18 Merge branch 'feature/new_item' into develop 2020-09-07 23:34:16 +09:00
taizan-hokuto
be0ab2431b Delete test for unuse module 2020-09-07 23:33:26 +09:00
taizan-hokuto
2edb60c592 Delete unuse modules 2020-09-07 23:31:32 +09:00
taizan-hokuto
2c6c3a1ca3 Delete old progress bar 2020-09-07 23:30:49 +09:00
taizan-hokuto
4be540793d Delete unnecessary blank lines 2020-09-07 23:30:30 +09:00
taizan-hokuto
08b86fe596 Make it possible to switch progress bar 2020-09-07 23:29:48 +09:00
taizan-hokuto
157f3b9952 Fix handling when missing id and type 2020-09-07 23:28:03 +09:00
taizan-hokuto
8f3ca2662a Merge tag 'pbar' into develop
v0.1.9
2020-09-06 18:58:34 +09:00
taizan-hokuto
c4b015861c Merge branch 'hotfix/pbar' 2020-09-06 18:58:33 +09:00
taizan-hokuto
3aa413d59e Increment version 2020-09-06 18:54:10 +09:00
taizan-hokuto
03ba285a16 Fix callback handling 2020-09-06 18:53:35 +09:00
taizan-hokuto
5fe0ee5aa8 Merge tag 'v0.1.8' into develop
v0.1.8
2020-09-06 18:27:58 +09:00
taizan-hokuto
4e829a25d4 Merge branch 'release/v0.1.8' 2020-09-06 18:27:57 +09:00
taizan-hokuto
15132a9bb8 Increment version 2020-09-06 18:27:08 +09:00
taizan-hokuto
64ace9dad6 Update progress bar 2020-09-06 18:25:16 +09:00
taizan-hokuto
9a2e96d3a0 Merge tag 'extract_vid' into develop
v0.1.7
2020-09-04 01:55:42 +09:00
taizan-hokuto
a3695a59b8 Merge branch 'hotfix/extract_vid' 2020-09-04 01:55:41 +09:00
taizan-hokuto
bc8655ed62 Increment version 2020-09-04 01:53:14 +09:00
taizan-hokuto
3bdc465740 Devide exception handling 2020-09-04 01:52:53 +09:00
taizan-hokuto
235d6b7212 Fix extract video info 2020-09-04 01:46:10 +09:00
taizan-hokuto
9f0754da57 Merge tag 'http2' into develop
v0.1.6
2020-09-03 21:27:48 +09:00
taizan-hokuto
306b0a4564 Merge branch 'hotfix/http2' 2020-09-03 21:27:48 +09:00
taizan-hokuto
1c49387f1a Increment version 2020-09-03 21:24:42 +09:00
taizan-hokuto
300d96e56c Fix requirements.txt 2020-09-03 21:24:21 +09:00
taizan-hokuto
0e301f48a8 Merge tag 'v0.1.5' into develop
v0.1.5
2020-09-03 20:16:56 +09:00
51 changed files with 1619 additions and 1177 deletions

27
.github/workflows/run_test.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
name: Run All UnitTest
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
matrix:
python-version: [3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt -r requirements_test.txt
- name: Test with pytest
run: |
export PYTHONPATH=./
pytest --verbose --color=yes

19
Pipfile Normal file
View File

@@ -0,0 +1,19 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
httpx = {extras = ["http2"], version = "*"}
protobuf = "==3.14.0"
pytz = "*"
urllib3 = "*"
[dev-packages]
pytest-mock = "*"
pytest-httpx = "*"
wheel = "*"
twine = "*"
[requires]
python_version = ">=3.6"

425
Pipfile.lock generated Normal file
View File

@@ -0,0 +1,425 @@
{
"_meta": {
"hash": {
"sha256": "aa731e6542f5f65756b98efe3e444ecffe78843c1041ab041cafdd2592c607db"
},
"pipfile-spec": 6,
"requires": {
"python_version": ">=3.6"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"certifi": {
"hashes": [
"sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
"sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
],
"version": "==2020.11.8"
},
"h11": {
"hashes": [
"sha256:3c6c61d69c6f13d41f1b80ab0322f1872702a3ba26e12aa864c928f6a43fbaab",
"sha256:ab6c335e1b6ef34b205d5ca3e228c9299cc7218b049819ec84a388c2525e5d87"
],
"version": "==0.11.0"
},
"h2": {
"hashes": [
"sha256:61e0f6601fa709f35cdb730863b4e5ec7ad449792add80d1410d4174ed139af5",
"sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14"
],
"version": "==3.2.0"
},
"hpack": {
"hashes": [
"sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89",
"sha256:8eec9c1f4bfae3408a3f30500261f7e6a65912dc138526ea054f9ad98892e9d2"
],
"version": "==3.0.0"
},
"httpcore": {
"hashes": [
"sha256:37660b117ba9055e8d5d19c29684d2204bbd3150020dde0ebd2dd2bcf18dfe50",
"sha256:3c5fcd97c52c3f6a1e4d939d776458e6177b5c238b825ed51d72840e582573b5"
],
"markers": "python_version >= '3.6'",
"version": "==0.12.1"
},
"httpx": {
"extras": [
"http2"
],
"hashes": [
"sha256:126424c279c842738805974687e0518a94c7ae8d140cd65b9c4f77ac46ffa537",
"sha256:9cffb8ba31fac6536f2c8cde30df859013f59e4bcc5b8d43901cb3654a8e0a5b"
],
"index": "pypi",
"version": "==0.16.1"
},
"hyperframe": {
"hashes": [
"sha256:5187962cb16dcc078f23cb5a4b110098d546c3f41ff2d4038a9896893bbd0b40",
"sha256:a9f5c17f2cc3c719b917c4f33ed1c61bd1f8dfac4b1bd23b7c80b3400971b41f"
],
"version": "==5.2.0"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
],
"version": "==2.10"
},
"protobuf": {
"hashes": [
"sha256:0e247612fadda953047f53301a7b0407cb0c3cb4ae25a6fde661597a04039b3c",
"sha256:0fc96785262042e4863b3f3b5c429d4636f10d90061e1840fce1baaf59b1a836",
"sha256:1c51fda1bbc9634246e7be6016d860be01747354ed7015ebe38acf4452f470d2",
"sha256:1d63eb389347293d8915fb47bee0951c7b5dab522a4a60118b9a18f33e21f8ce",
"sha256:22bcd2e284b3b1d969c12e84dc9b9a71701ec82d8ce975fdda19712e1cfd4e00",
"sha256:2a7e2fe101a7ace75e9327b9c946d247749e564a267b0515cf41dfe450b69bac",
"sha256:43b554b9e73a07ba84ed6cf25db0ff88b1e06be610b37656e292e3cbb5437472",
"sha256:4b74301b30513b1a7494d3055d95c714b560fbb630d8fb9956b6f27992c9f980",
"sha256:4e75105c9dfe13719b7293f75bd53033108f4ba03d44e71db0ec2a0e8401eafd",
"sha256:5b7a637212cc9b2bcf85dd828b1178d19efdf74dbfe1ddf8cd1b8e01fdaaa7f5",
"sha256:5e9806a43232a1fa0c9cf5da8dc06f6910d53e4390be1fa06f06454d888a9142",
"sha256:629b03fd3caae7f815b0c66b41273f6b1900a579e2ccb41ef4493a4f5fb84f3a",
"sha256:72230ed56f026dd664c21d73c5db73ebba50d924d7ba6b7c0d81a121e390406e",
"sha256:86a75477addde4918e9a1904e5c6af8d7b691f2a3f65587d73b16100fbe4c3b2",
"sha256:8971c421dbd7aad930c9bd2694122f332350b6ccb5202a8b7b06f3f1a5c41ed5",
"sha256:9616f0b65a30851e62f1713336c931fcd32c057202b7ff2cfbfca0fc7d5e3043",
"sha256:b0d5d35faeb07e22a1ddf8dce620860c8fe145426c02d1a0ae2688c6e8ede36d",
"sha256:ecc33531a213eee22ad60e0e2aaea6c8ba0021f0cce35dbf0ab03dee6e2a23a1"
],
"index": "pypi",
"version": "==3.14.0"
},
"pytz": {
"hashes": [
"sha256:3e6b7dd2d1e0a59084bcee14a17af60c5c562cdc16d828e8eba2e683d3a7e268",
"sha256:5c55e189b682d420be27c6995ba6edce0c0a77dd67bfbe2ae6607134d5851ffd"
],
"index": "pypi",
"version": "==2020.4"
},
"rfc3986": {
"extras": [
"idna2008"
],
"hashes": [
"sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d",
"sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50"
],
"version": "==1.4.0"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"sniffio": {
"hashes": [
"sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663",
"sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"
],
"markers": "python_version >= '3.5'",
"version": "==1.2.0"
},
"urllib3": {
"hashes": [
"sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
"sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
],
"index": "pypi",
"version": "==1.26.2"
}
},
"develop": {
"atomicwrites": {
"hashes": [
"sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197",
"sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"
],
"markers": "sys_platform == 'win32'",
"version": "==1.4.0"
},
"attrs": {
"hashes": [
"sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
"sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.3.0"
},
"bleach": {
"hashes": [
"sha256:52b5919b81842b1854196eaae5ca29679a2f2e378905c346d3ca8227c2c66080",
"sha256:9f8ccbeb6183c6e6cddea37592dfb0167485c1e3b13b3363bc325aa8bda3adbd"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==3.2.1"
},
"certifi": {
"hashes": [
"sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
"sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
],
"version": "==2020.11.8"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"version": "==3.0.4"
},
"colorama": {
"hashes": [
"sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
"sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
],
"markers": "sys_platform == 'win32'",
"version": "==0.4.4"
},
"docutils": {
"hashes": [
"sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af",
"sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==0.16"
},
"h11": {
"hashes": [
"sha256:3c6c61d69c6f13d41f1b80ab0322f1872702a3ba26e12aa864c928f6a43fbaab",
"sha256:ab6c335e1b6ef34b205d5ca3e228c9299cc7218b049819ec84a388c2525e5d87"
],
"version": "==0.11.0"
},
"httpcore": {
"hashes": [
"sha256:37660b117ba9055e8d5d19c29684d2204bbd3150020dde0ebd2dd2bcf18dfe50",
"sha256:3c5fcd97c52c3f6a1e4d939d776458e6177b5c238b825ed51d72840e582573b5"
],
"markers": "python_version >= '3.6'",
"version": "==0.12.1"
},
"httpx": {
"extras": [
"http2"
],
"hashes": [
"sha256:126424c279c842738805974687e0518a94c7ae8d140cd65b9c4f77ac46ffa537",
"sha256:9cffb8ba31fac6536f2c8cde30df859013f59e4bcc5b8d43901cb3654a8e0a5b"
],
"index": "pypi",
"version": "==0.16.1"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
],
"version": "==2.10"
},
"iniconfig": {
"hashes": [
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
],
"version": "==1.1.1"
},
"keyring": {
"hashes": [
"sha256:12de23258a95f3b13e5b167f7a641a878e91eab8ef16fafc077720a95e6115bb",
"sha256:207bd66f2a9881c835dad653da04e196c678bf104f8252141d2d3c4f31051579"
],
"markers": "python_version >= '3.6'",
"version": "==21.5.0"
},
"packaging": {
"hashes": [
"sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8",
"sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.4"
},
"pkginfo": {
"hashes": [
"sha256:a6a4ac943b496745cec21f14f021bbd869d5e9b4f6ec06918cffea5a2f4b9193",
"sha256:ce14d7296c673dc4c61c759a0b6c14bae34e34eb819c0017bb6ca5b7292c56e9"
],
"version": "==1.6.1"
},
"pluggy": {
"hashes": [
"sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
"sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.13.1"
},
"py": {
"hashes": [
"sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2",
"sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.9.0"
},
"pygments": {
"hashes": [
"sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0",
"sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773"
],
"markers": "python_version >= '3.5'",
"version": "==2.7.2"
},
"pyparsing": {
"hashes": [
"sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
"sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.4.7"
},
"pytest": {
"hashes": [
"sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe",
"sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e"
],
"markers": "python_version >= '3.5'",
"version": "==6.1.2"
},
"pytest-httpx": {
"hashes": [
"sha256:1cee873fdad622ca21169105691607db1411c9927aae9c2f44c02a893977c8f3",
"sha256:b996c8a4be900dfd37746d438cc9fc9321d37ffcacc1f5b7a9fc391daa208456"
],
"index": "pypi",
"version": "==0.10.0"
},
"pytest-mock": {
"hashes": [
"sha256:024e405ad382646318c4281948aadf6fe1135632bea9cc67366ea0c4098ef5f2",
"sha256:a4d6d37329e4a893e77d9ffa89e838dd2b45d5dc099984cf03c703ac8411bb82"
],
"index": "pypi",
"version": "==3.3.1"
},
"pywin32-ctypes": {
"hashes": [
"sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942",
"sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"
],
"markers": "sys_platform == 'win32'",
"version": "==0.2.0"
},
"readme-renderer": {
"hashes": [
"sha256:267854ac3b1530633c2394ead828afcd060fc273217c42ac36b6be9c42cd9a9d",
"sha256:6b7e5aa59210a40de72eb79931491eaf46fefca2952b9181268bd7c7c65c260a"
],
"version": "==28.0"
},
"requests": {
"hashes": [
"sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8",
"sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==2.25.0"
},
"requests-toolbelt": {
"hashes": [
"sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f",
"sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"
],
"version": "==0.9.1"
},
"rfc3986": {
"extras": [
"idna2008"
],
"hashes": [
"sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d",
"sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50"
],
"version": "==1.4.0"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"sniffio": {
"hashes": [
"sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663",
"sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"
],
"markers": "python_version >= '3.5'",
"version": "==1.2.0"
},
"toml": {
"hashes": [
"sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
"sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.10.2"
},
"tqdm": {
"hashes": [
"sha256:18d6a615aedd09ec8456d9524489dab330af4bd5c2a14a76eb3f9a0e14471afe",
"sha256:80d9d5165d678dbd027dd102dfb99f71bf05f333b61fb761dbba13b4ab719ead"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==4.52.0"
},
"twine": {
"hashes": [
"sha256:34352fd52ec3b9d29837e6072d5a2a7c6fe4290e97bba46bb8d478b5c598f7ab",
"sha256:ba9ff477b8d6de0c89dd450e70b2185da190514e91c42cc62f96850025c10472"
],
"index": "pypi",
"version": "==3.2.0"
},
"urllib3": {
"hashes": [
"sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
"sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
],
"index": "pypi",
"version": "==1.26.2"
},
"webencodings": {
"hashes": [
"sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
"sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
],
"version": "==0.5.1"
},
"wheel": {
"hashes": [
"sha256:497add53525d16c173c2c1c733b8f655510e909ea78cc0e29d374243544b77a2",
"sha256:99a22d87add3f634ff917310a3d87e499f19e663413a52eb9232c447aa646c9f"
],
"index": "pypi",
"version": "==0.35.1"
}
}
}

163
README.md
View File

@@ -5,9 +5,7 @@ pytchat is a python library for fetching youtube live chat.
## Description
pytchat is a python library for fetching youtube live chat
without using youtube api, Selenium or BeautifulSoup.
pytchatは、YouTubeチャットを閲覧するためのpythonライブラリです。
without using Selenium or BeautifulSoup.
Other features:
+ Customizable [chat data processors](https://github.com/taizan-hokuto/pytchat/wiki/ChatProcessor) including youtube api compatible one.
@@ -16,7 +14,7 @@ Other features:
instead of web scraping.
For more detailed information, see [wiki](https://github.com/taizan-hokuto/pytchat/wiki). <br>
より詳細な解説は[wiki](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)を参照してください。
[wiki (Japanese)](https://github.com/taizan-hokuto/pytchat/wiki/Home_jp)
## Install
```python
@@ -26,145 +24,61 @@ pip install pytchat
### CLI
One-liner command.
Save chat data to html, with embedded custom emojis.
+ One-liner command.
+ Save chat data to html with embedded custom emojis.
+ Show chat stream (--echo option).
```bash
$ pytchat -v https://www.youtube.com/watch?v=ZJ6Q4U_Vg6s -o "c:/temp/"
$ pytchat -v uIx8l2xlYVY -o "c:/temp/"
# options:
# -v : Video ID or URL that includes ID
# -o : output directory (default path: './')
# --echo : Show chats.
# saved filename is [video_id].html
```
### on-demand mode
### Fetch chat data (see [wiki](https://github.com/taizan-hokuto/pytchat/wiki/PytchatCore))
```python
from pytchat import LiveChat
livechat = LiveChat(video_id = "Zvp1pJpie4I")
# It is also possible to specify a URL that includes the video ID:
# livechat = LiveChat("https://www.youtube.com/watch?v=Zvp1pJpie4I")
while livechat.is_alive():
try:
chatdata = livechat.get()
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]- {c.message}")
chatdata.tick()
except KeyboardInterrupt:
livechat.terminate()
break
```
### callback mode
```python
from pytchat import LiveChat
import time
def main():
livechat = LiveChat(video_id = "Zvp1pJpie4I", callback = disp)
while livechat.is_alive():
#other background operation.
time.sleep(1)
livechat.terminate()
#callback function (automatically called)
def disp(chatdata):
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]- {c.message}")
chatdata.tick()
if __name__ == '__main__':
main()
```
### asyncio context:
```python
from pytchat import LiveChatAsync
from concurrent.futures import CancelledError
import asyncio
async def main():
livechat = LiveChatAsync("Zvp1pJpie4I", callback = func)
while livechat.is_alive():
#other background operation.
await asyncio.sleep(3)
#callback function is automatically called.
async def func(chatdata):
for c in chatdata.items:
print(f"{c.datetime} [{c.author.name}]-{c.message} {c.amountString}")
await chatdata.tick_async()
if __name__ == '__main__':
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
except CancelledError:
pass
```
### youtube api compatible processor:
```python
from pytchat import LiveChat, CompatibleProcessor
import time
chat = LiveChat("Zvp1pJpie4I",
processor = CompatibleProcessor() )
import pytchat
chat = pytchat.create(video_id="uIx8l2xlYVY")
while chat.is_alive():
try:
data = chat.get()
polling = data['pollingIntervalMillis']/1000
for c in data['items']:
if c.get('snippet'):
print(f"[{c['authorDetails']['displayName']}]"
f"-{c['snippet']['displayMessage']}")
time.sleep(polling/len(data['items']))
except KeyboardInterrupt:
chat.terminate()
for c in chat.get().sync_items():
print(f"{c.datetime} [{c.author.name}]- {c.message}")
```
### replay:
If specified video is not live,
automatically try to fetch archived chat data.
### Output JSON format string (feature of [DefaultProcessor](https://github.com/taizan-hokuto/pytchat/wiki/DefaultProcessor))
```python
from pytchat import LiveChat
import pytchat
import time
def main():
#seektime (seconds): start position of chat.
chat = LiveChat("ojes5ULOqhc", seektime = 60*30)
print('Replay from 30:00')
try:
while chat.is_alive():
data = chat.get()
for c in data.items:
print(f"{c.elapsedTime} [{c.author.name}]-{c.message} {c.amountString}")
data.tick()
except KeyboardInterrupt:
chat.terminate()
if __name__ == '__main__':
main()
chat = pytchat.create(video_id="uIx8l2xlYVY")
while chat.is_alive():
print(chat.get().json())
time.sleep(5)
'''
# Each chat item can also be output in JSON format.
for c in chat.get().items:
print(c.json())
'''
```
### Extract archived chat data as [HTML](https://github.com/taizan-hokuto/pytchat/wiki/HTMLArchiver) or [tab separated values](https://github.com/taizan-hokuto/pytchat/wiki/TSVArchiver).
```python
from pytchat import HTMLArchiver, Extractor
video_id = "*******"
ex = Extractor(
video_id,
div=10,
processor=HTMLArchiver("c:/test.html")
)
ex.extract()
print("finished.")
```
### other
+ Fetch chat with a buffer ([LiveChat](https://github.com/taizan-hokuto/pytchat/wiki/LiveChat))
+ Use with asyncio ([LiveChatAsync](https://github.com/taizan-hokuto/pytchat/wiki/LiveChatAsync))
+ YT API compatible chat processor ([CompatibleProcessor](https://github.com/taizan-hokuto/pytchat/wiki/CompatibleProcessor))
+ Extract archived chat data ([Extractor](https://github.com/taizan-hokuto/pytchat/wiki/Extractor))
## Structure of Default Processor
Each item can be got with `items` function.
Each item can be got with `sync_items()` function.
<table>
<tr>
<th>name</th>
@@ -298,6 +212,9 @@ Most of source code of CLI refer to:
[PetterKraabol / Twitch-Chat-Downloader](https://github.com/PetterKraabol/Twitch-Chat-Downloader)
Progress bar in CLI is based on:
[vladignatyev/progress.py](https://gist.github.com/vladignatyev/06860ec2040cb497f0f3)
## Author

View File

@@ -1,14 +1,29 @@
"""
pytchat is a lightweight python library to browse youtube livechat without Selenium or BeautifulSoup.
"""
__copyright__ = 'Copyright (C) 2019 taizan-hokuto'
__version__ = '0.1.5'
__copyright__ = 'Copyright (C) 2019, 2020 taizan-hokuto'
__version__ = '0.4.7'
__license__ = 'MIT'
__author__ = 'taizan-hokuto'
__author_email__ = '55448286+taizan-hokuto@users.noreply.github.com'
__url__ = 'https://github.com/taizan-hokuto/pytchat'
__all__ = ["core_async","core_multithread","processors"]
from .exceptions import (
ChatParseException,
ResponseContextError,
NoContents,
NoContinuation,
IllegalFunctionCall,
InvalidVideoIdException,
UnknownConnectionError,
RetryExceedMaxCount,
ChatDataFinished,
ReceivedUnknownContinuation,
FailedExtractContinuation,
VideoInfoParseError,
PatternUnmatchError
)
from .api import (
cli,
@@ -26,7 +41,7 @@ from .api import (
SimpleDisplayProcessor,
SpeedCalculator,
SuperchatCalculator,
VideoInfo
VideoInfo,
create
)
# flake8: noqa

View File

@@ -1,5 +1,6 @@
from . import cli
from . import config
from .core import create
from .core_multithread.livechat import LiveChat
from .core_async.livechat import LiveChatAsync
from .processors.chat_processor import ChatProcessor
@@ -15,4 +16,24 @@ from .processors.superchat.calculator import SuperchatCalculator
from .tool.extract.extractor import Extractor
from .tool.videoinfo import VideoInfo
__all__ = [
cli,
config,
LiveChat,
LiveChatAsync,
ChatProcessor,
CompatibleProcessor,
DummyProcessor,
DefaultProcessor,
Extractor,
HTMLArchiver,
TSVArchiver,
JsonfileArchiver,
SimpleDisplayProcessor,
SpeedCalculator,
SuperchatCalculator,
VideoInfo,
create
]
# flake8: noqa

View File

@@ -1,25 +1,21 @@
import argparse
try:
from asyncio import CancelledError
except ImportError:
from asyncio.futures import CancelledError
import os
import signal
from json.decoder import JSONDecodeError
from pathlib import Path
from .arguments import Arguments
from .progressbar import ProgressBar
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
from .. import util
from .echo import Echo
from .. exceptions import InvalidVideoIdException
from .. import __version__
from .cli_extractor import CLIExtractor
'''
Most of CLI modules refer to
Petter Kraabøl's Twitch-Chat-Downloader
https://github.com/PetterKraabol/Twitch-Chat-Downloader
(MIT License)
'''
@@ -32,69 +28,44 @@ def main():
'If ID starts with a hyphen (-), enclose the ID in square brackets.')
parser.add_argument('-o', f'--{Arguments.Name.OUTPUT}', type=str,
help='Output directory (end with "/"). default="./"', default='./')
parser.add_argument(f'--{Arguments.Name.DEBUG}', action='store_true',
help='Debug mode. Stop when exceptions have occurred and save error data (".dat" file).')
parser.add_argument(f'--{Arguments.Name.VERSION}', action='store_true',
help='Show version')
parser.add_argument(f'--{Arguments.Name.SAVE_ERROR_DATA}', action='store_true',
help='Save error data when error occurs(".dat" file)')
help='Show version.')
parser.add_argument(f'--{Arguments.Name.ECHO}', action='store_true',
help='Display chats of specified video.')
Arguments(parser.parse_args().__dict__)
if Arguments().print_version:
print(f'pytchat v{__version__} © 2019 taizan-hokuto')
print(f'pytchat v{__version__} © 2019, 2020 taizan-hokuto')
return
# Extractor
if not Arguments().video_ids:
parser.print_help()
return
for video_id in Arguments().video_ids:
if '[' in video_id:
video_id = video_id.replace('[', '').replace(']', '')
# Echo
if Arguments().echo:
if len(Arguments().video_ids) > 1:
print("When using --echo option, only one video ID can be specified.")
return
try:
video_id = extract_video_id(video_id)
if os.path.exists(Arguments().output):
path = Path(Arguments().output + video_id + '.html')
else:
raise FileNotFoundError
info = VideoInfo(video_id)
print(f"Extracting...\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}")
Echo(Arguments().video_ids[0]).run()
except InvalidVideoIdException as e:
print("Invalid video id:", str(e))
except Exception as e:
print(type(e), str(e))
if Arguments().debug:
raise
finally:
return
print(f" output path: {path.resolve()}")
duration = info.get_duration()
pbar = ProgressBar(duration)
ex = Extractor(video_id,
processor=HTMLArchiver(Arguments().output + video_id + '.html'),
callback=pbar._disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: cancel(ex, pbar)))
ex.extract()
pbar.close()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
return
print("\nThe extraction process has been completed.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except (TypeError, NoContents) as e:
print(e.with_traceback())
except FileNotFoundError:
print("The specified directory does not exist.:{}".format(Arguments().output))
except JSONDecodeError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_JSON_DECODE", ".dat")
except PatternUnmatchError as e:
print(e.msg)
print("Cannot parse video information.:{}".format(video_id))
if Arguments().save_error_data:
util.save(e.doc, "ERR_PATTERN_UNMATCH", ".dat")
return
def cancel(ex: Extractor, pbar: ProgressBar):
ex.cancel()
pbar.cancel()
# Extractor
if not os.path.exists(Arguments().output):
print("\nThe specified directory does not exist.:{}\n".format(Arguments().output))
return
try:
CLIExtractor().run()
except CancelledError as e:
print(str(e))

View File

@@ -18,7 +18,8 @@ class Arguments(metaclass=Singleton):
VERSION: str = 'version'
OUTPUT: str = 'output_dir'
VIDEO_IDS: str = 'video_id'
SAVE_ERROR_DATA: bool = 'save_error_data'
DEBUG: bool = 'debug'
ECHO: bool = 'echo'
def __init__(self,
arguments: Optional[Dict[str, Union[str, bool, int]]] = None):
@@ -35,7 +36,9 @@ class Arguments(metaclass=Singleton):
self.print_version: bool = arguments[Arguments.Name.VERSION]
self.output: str = arguments[Arguments.Name.OUTPUT]
self.video_ids: List[int] = []
self.save_error_data: bool = arguments[Arguments.Name.SAVE_ERROR_DATA]
self.debug: bool = arguments[Arguments.Name.DEBUG]
self.echo: bool = arguments[Arguments.Name.ECHO]
# Videos
if arguments[Arguments.Name.VIDEO_IDS]:
self.video_ids = [video_id

View File

@@ -0,0 +1,121 @@
import asyncio
import os
import signal
import traceback
from httpcore import ReadTimeout as HCReadTimeout, NetworkError as HCNetworkError
from json.decoder import JSONDecodeError
from pathlib import Path
from .arguments import Arguments
from .progressbar import ProgressBar
from .. import util
from .. exceptions import InvalidVideoIdException, NoContents, PatternUnmatchError, UnknownConnectionError
from .. processors.html_archiver import HTMLArchiver
from .. tool.extract.extractor import Extractor
from .. tool.videoinfo import VideoInfo
from .. util.extract_video_id import extract_video_id
class CLIExtractor:
def run(self) -> None:
ex = None
pbar = None
for counter, video_id in enumerate(Arguments().video_ids):
if len(Arguments().video_ids) > 1:
print(f"\n{'-' * 10} video:{counter + 1} of {len(Arguments().video_ids)} {'-' * 10}")
try:
video_id = extract_video_id(video_id)
separated_path = str(Path(Arguments().output)) + os.path.sep
path = util.checkpath(separated_path + video_id + '.html')
try:
info = VideoInfo(video_id)
except (PatternUnmatchError, JSONDecodeError) as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
if Arguments().debug:
util.save(str(e.doc), "ERR", ".dat")
continue
except Exception as e:
print("Cannot parse video information.:{} {}".format(video_id, type(e)))
continue
print(f"\n"
f" video_id: {video_id}\n"
f" channel: {info.get_channel_name()}\n"
f" title: {info.get_title()}\n"
f" output path: {path}")
duration = info.get_duration()
pbar = ProgressBar(total=(duration * 1000), status_txt="Extracting")
ex = Extractor(video_id,
callback=pbar.disp,
div=10)
signal.signal(signal.SIGINT, (lambda a, b: self.cancel(ex, pbar)))
data = ex.extract()
if data == [] or data is None:
continue
pbar.reset("#", "=", total=1000, status_txt="Rendering ")
processor = HTMLArchiver(path, callback=pbar.disp)
processor.process(
[{'video_id': None,
'timeout': 1,
'chatdata': (action["replayChatItemAction"]["actions"][0] for action in data)}]
)
processor.finalize()
pbar.reset('#', '#', status_txt='Completed ')
pbar.close()
print()
if pbar.is_cancelled():
print("\nThe extraction process has been discontinued.\n")
except InvalidVideoIdException:
print("Invalid Video ID or URL:", video_id)
except NoContents as e:
print(f"Abort:{str(e)}:[{video_id}]")
except (JSONDecodeError, PatternUnmatchError) as e:
print("{}:{}".format(e.msg, video_id))
if Arguments().debug:
filename = util.save(e.doc, "ERR_", ".dat")
traceback.print_exc()
print(f"Saved error data: {filename}")
except (UnknownConnectionError, HCNetworkError, HCReadTimeout) as e:
if Arguments().debug:
traceback.print_exc()
print(f"An unknown network error occurred during the processing of [{video_id}]. : " + str(e))
except Exception as e:
print(f"Abort:{str(type(e))} {str(e)[:80]}")
if Arguments().debug:
traceback.print_exc()
finally:
clear_tasks()
return
def cancel(self, ex=None, pbar=None) -> None:
'''Called when keyboard interrupted has occurred.
'''
print("\nKeyboard interrupted.\n")
if ex and pbar:
ex.cancel()
pbar.cancel()
def clear_tasks():
'''
Clear remained tasks.
Called when internal exception has occurred or
after each extraction process is completed.
'''
async def _shutdown():
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(_shutdown())
except Exception as e:
print(str(e))
if Arguments().debug:
traceback.print_exc()

22
pytchat/cli/echo.py Normal file
View File

@@ -0,0 +1,22 @@
import pytchat
from ..exceptions import ChatDataFinished, NoContents
from ..util.extract_video_id import extract_video_id
class Echo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
def run(self):
livechat = pytchat.create(self.video_id)
while livechat.is_alive():
chatdata = livechat.get()
for c in chatdata.sync_items():
print(f"{c.datetime} [{c.author.name}] {c.message} {c.amountString}")
try:
livechat.raise_for_status()
except (ChatDataFinished, NoContents):
print("Chat finished.")
except Exception as e:
print(type(e), str(e))

View File

@@ -1,38 +1,51 @@
'''
This code for this progress bar is based on
This code is based on
vladignatyev/progress.py
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
(MIT License)
'''
import shutil
import sys
class ProgressBar:
def __init__(self, duration):
self._duration = duration
self._count = 0
def __init__(self, total, status_txt):
self._bar_len = 60
self._cancelled = False
self.reset(total=total, status_txt=status_txt)
def reset(self, symbol_done="=", symbol_space=" ", total=100, status_txt=''):
self._console_width = shutil.get_terminal_size(fallback=(80, 24)).columns
self._symbol_done = symbol_done
self._symbol_space = symbol_space
self._total = total
self._status_txt = status_txt
self._count = 0
def _disp(self, _, fetched):
self._progress(fetched / 1000, self._duration)
def disp(self, _, fetched):
self._progress(fetched, self._total)
def _progress(self, fillin, total, status=''):
def _progress(self, fillin, total):
if total == 0 or self._cancelled:
return
self._count += fillin
filled_len = int(round(self._bar_len * self._count / float(total)))
percents = round(100.0 * self._count / float(total), 1)
if percents > 100:
percents = 100.0
if filled_len > self._bar_len:
filled_len = self._bar_len
percents = 100
bar = '=' * filled_len + ' ' * (self._bar_len - filled_len)
sys.stdout.write(' [%s] %s%s ...%s\r' % (bar, percents, '%', status))
bar = self._symbol_done * filled_len + \
self._symbol_space * (self._bar_len - filled_len)
disp = f" [{bar}] {percents:>5.1f}% ...{self._status_txt} "[:self._console_width - 1] + '\r'
sys.stdout.write(disp)
sys.stdout.flush()
def close(self):
if not self._cancelled:
self._progress(self._duration, self._duration)
self._progress(self._total, self._total)
def cancel(self):
self._cancelled = True

View File

@@ -1,4 +1,4 @@
import logging
import logging # noqa
from . import mylogger
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',

7
pytchat/core/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
from .pytchat import PytchatCore
from .. util.extract_video_id import extract_video_id
def create(video_id: str, **kwargs):
_vid = extract_video_id(video_id)
return PytchatCore(_vid, **kwargs)

204
pytchat/core/pytchat.py Normal file
View File

@@ -0,0 +1,204 @@
import httpx
import json
import signal
import time
import traceback
import urllib.parse
from ..parser.live import Parser
from .. import config
from .. import exceptions
from ..paramgen import liveparam, arcparam
from ..processors.default.processor import DefaultProcessor
from ..processors.combinator import Combinator
from ..util.extract_video_id import extract_video_id
headers = config.headers
MAX_RETRY = 10
class PytchatCore:
'''
Parameter
---------
video_id : str
seektime : int
start position of fetching chat (seconds).
This option is valid for archived chat only.
If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor
interruptable : bool
Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
force_replay : bool
force to fetch archived chat data, even if specified video is live.
topchat_only : bool
If True, get only top chat.
hold_exception : bool [default:True]
If True, when exceptions occur, the exception is held internally,
and can be raised by raise_for_status().
Attributes
---------
_is_alive : bool
Flag to stop getting chat.
'''
_setup_finished = False
def __init__(self, video_id,
seektime=-1,
processor=DefaultProcessor(),
interruptable=True,
force_replay=False,
topchat_only=False,
hold_exception=True,
logger=config.logger(__name__),
):
self._video_id = extract_video_id(video_id)
self.seektime = seektime
if isinstance(processor, tuple):
self.processor = Combinator(processor)
else:
self.processor = processor
self._is_alive = True
self._is_replay = force_replay
self._hold_exception = hold_exception
self._exception_holder = None
self._parser = Parser(
is_replay=self._is_replay,
exception_holder=self._exception_holder
)
self._first_fetch = True
self._fetch_url = "live_chat/get_live_chat?continuation="
self._topchat_only = topchat_only
self._logger = logger
if interruptable:
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
self._setup()
def _setup(self):
time.sleep(0.1) # sleep shortly to prohibit skipping fetching data
"""Fetch first continuation parameter,
create and start _listen loop.
"""
self.continuation = liveparam.getparam(self._video_id, 3)
def _get_chat_component(self):
''' Fetch chat data and store them into buffer,
get next continuaiton parameter and loop.
Parameter
---------
continuation : str
parameter for next chat data
'''
try:
with httpx.Client(http2=True) as client:
if self.continuation and self._is_alive:
contents = self._get_contents(self.continuation, client, headers)
metadata, chatdata = self._parser.parse(contents)
timeout = metadata['timeoutMs'] / 1000
chat_component = {
"video_id": self._video_id,
"timeout": timeout,
"chatdata": chatdata
}
self.continuation = metadata.get('continuation')
return chat_component
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
self._raise_exception(e)
except Exception as e:
self._logger.error(f"{traceback.format_exc(limit=-1)}")
self._raise_exception(e)
def _get_contents(self, continuation, client, headers):
'''Get 'continuationContents' from livechat json.
If contents is None at first fetching,
try to fetch archive chat data.
Return:
-------
'continuationContents' which includes metadata & chat data.
'''
livechat_json = (
self._get_livechat_json(continuation, client, headers)
)
contents = self._parser.get_contents(livechat_json)
if self._first_fetch:
if contents is None or self._is_replay:
'''Try to fetch archive chat data.'''
self._parser.is_replay = True
self._fetch_url = "live_chat_replay/get_live_chat_replay?continuation="
continuation = arcparam.getparam(
self._video_id, self.seektime, self._topchat_only)
livechat_json = (self._get_livechat_json(continuation, client, headers))
reload_continuation = self._parser.reload_continuation(
self._parser.get_contents(livechat_json))
if reload_continuation:
livechat_json = (self._get_livechat_json(
reload_continuation, client, headers))
contents = self._parser.get_contents(livechat_json)
self._is_replay = True
self._first_fetch = False
return contents
def _get_livechat_json(self, continuation, client, headers):
'''
Get json which includes chat data.
'''
continuation = urllib.parse.quote(continuation)
livechat_json = None
err = None
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1):
with client:
try:
livechat_json = client.get(url, headers=headers).json()
break
except (json.JSONDecodeError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError) as e:
err = e
time.sleep(2)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. Last error: {str(err)}")
self._raise_exception(exceptions.RetryExceedMaxCount())
return livechat_json
def get(self):
if self.is_alive():
chat_component = self._get_chat_component()
return self.processor.process([chat_component])
else:
return []
def is_replay(self):
return self._is_replay
def is_alive(self):
return self._is_alive
def terminate(self):
self._is_alive = False
self.processor.finalize()
def raise_for_status(self):
if self._exception_holder is not None:
raise self._exception_holder
def _raise_exception(self, exception: Exception = None):
self.terminate()
if self._hold_exception is False:
raise exception
self._exception_holder = exception

View File

@@ -4,13 +4,13 @@ import asyncio
class Buffer(asyncio.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
Buffer for storing chat data.
Parameter
---------
maxsize : int
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
Maximum number of chat blocks to be stored.
If it exceeds the maximum, the oldest chat block will be discarded.
'''
def __init__(self, maxsize=0):

View File

@@ -22,54 +22,51 @@ MAX_RETRY = 10
class LiveChatAsync:
'''asyncioを利用してYouTubeのライブ配信のチャットデータを取得する。
'''LiveChatAsync object fetches chat data and stores them
in a buffer with asyncio.
Parameter
---------
video_id : str
動画ID
seektime : int
(ライブチャット取得時は無視)
取得開始するアーカイブ済みチャットの経過時間(秒)
マイナス値を指定した場合は、配信開始前のチャットも取得する。
start position of fetching chat (seconds).
This option is valid for archived chat only.
If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor
チャットデータを加工するオブジェクト
buffer : Buffer(maxsize:20[default])
チャットデータchat_componentを格納するバッファ。
maxsize : 格納できるchat_componentの個数
default値20個。1個で約5~10秒分。
buffer : Buffer
buffer of chat data fetched background.
interruptable : bool
Ctrl+Cによる処理中断を行うかどうか。
Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
callback : func
_listen()関数から一定間隔で自動的に呼びだす関数。
function called periodically from _listen().
done_callback : func
listener終了時に呼び出すコールバック。
function called when listener ends.
exception_handler : func
例外を処理する関数
direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる
If True, invoke specified callback function without using buffer.
callback is required. If not, IllegalFunctionCall will be raised.
force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。
force to fetch archived chat data, even if specified video is live.
topchat_only : bool
Trueの場合、上位チャットのみ取得する。
If True, get only top chat.
Attributes
---------
_is_alive : bool
チャット取得を停止するためのフラグ
Flag to stop getting chat.
'''
_setup_finished = False
@@ -114,31 +111,30 @@ class LiveChatAsync:
self._set_exception_handler(exception_handler)
if interruptable:
signal.signal(signal.SIGINT,
(lambda a, b: asyncio.create_task(
LiveChatAsync.shutdown(None, signal.SIGINT, b))))
(lambda a, b: self._keyboard_interrupt()))
self._setup()
def _setup(self):
# direct modeがTrueでcallback未設定の場合例外発生。
# An exception is raised when direct mode is true and no callback is set.
if self._direct_mode:
if self._callback is None:
raise exceptions.IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.")
else:
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
# Create a default buffer if `direct_mode` is False and buffer is not set.
if self._buffer is None:
self._buffer = Buffer(maxsize=20)
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成
# Create a loop task to call callback if the `callback` param is specified.
if self._callback is None:
pass
else:
# callbackを呼ぶループタスクの開始
# Create a loop task to call callback if the `callback` param is specified.
loop = asyncio.get_event_loop()
loop.create_task(self._callback_loop(self._callback))
# _listenループタスクの開始
# Start a loop task for _listen()
loop = asyncio.get_event_loop()
self.listen_task = loop.create_task(self._startlisten())
# add_done_callbackの登録
# Register add_done_callback
if self._done_callback is None:
self.listen_task.add_done_callback(self._finish)
else:
@@ -190,12 +186,12 @@ class LiveChatAsync:
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
raise
except (TypeError, json.JSONDecodeError):
except Exception:
self._logger.error(f"{traceback.format_exc(limit = -1)}")
raise
self._logger.debug(f"[{self._video_id}]finished fetching chat.")
raise exceptions.ChatDataFinished
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
async def _check_pause(self, continuation):
if self._pauser.empty():
@@ -246,30 +242,30 @@ class LiveChatAsync:
'''
continuation = urllib.parse.quote(continuation)
livechat_json = None
status_code = 0
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1):
try:
resp = await client.get(url, headers=headers)
livechat_json = resp.json()
break
except (httpx.HTTPError, json.JSONDecodeError):
except (json.JSONDecodeError, httpx.HTTPError):
await asyncio.sleep(1)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. status_code={status_code}")
f"Exceeded retry count.")
return None
return livechat_json
async def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。
""" If a callback is specified in the constructor,
it throws chat data at regular intervals to the
function specified in the callback in the backgroun
Parameter
---------
callback : func
加工済みのチャットデータを渡す先の関数。
function to which the processed chat data is passed.
"""
while self.is_alive():
items = await self._buffer.get()
@@ -280,11 +276,13 @@ class LiveChatAsync:
await self._callback(processed_chat)
async def get(self):
""" bufferからデータを取り出し、processorに投げ、
加工済みのチャットデータを返す。
"""
Retrieves data from the buffer,
throws it to the processor,
and returns the processed chat data.
Returns
: Processorによって加工されたチャットデータ
: Chat data processed by the Processor
"""
if self._callback is None:
if self.is_alive():
@@ -293,7 +291,7 @@ class LiveChatAsync:
else:
return []
raise exceptions.IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。")
"Callback parameter is already set, so get() cannot be performed.")
def is_replay(self):
return self._is_replay
@@ -314,11 +312,11 @@ class LiveChatAsync:
return self._is_alive
def _finish(self, sender):
'''Listener終了時のコールバック'''
'''Called when the _listen() task finished.'''
try:
self._task_finished()
except CancelledError:
self._logger.debug(f'[{self._video_id}]cancelled:{sender}')
self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self):
if self._pauser.empty():
@@ -326,10 +324,14 @@ class LiveChatAsync:
self._is_alive = False
self._buffer.put_nowait({})
self.processor.finalize()
def _keyboard_interrupt(self):
self.exception = exceptions.ChatDataFinished()
self.terminate()
def _task_finished(self):
'''
Listenerを終了する。
Terminate fetching chats.
'''
if self.is_alive():
self.terminate()
@@ -339,7 +341,7 @@ class LiveChatAsync:
self.exception = e
if not isinstance(e, exceptions.ChatParseException):
self._logger.error(f'Internal exception - {type(e)}{str(e)}')
self._logger.info(f'[{self._video_id}]終了しました')
self._logger.info(f'[{self._video_id}] finished.')
def raise_for_status(self):
if self.exception is not None:
@@ -349,15 +351,3 @@ class LiveChatAsync:
def _set_exception_handler(cls, handler):
loop = asyncio.get_event_loop()
loop.set_exception_handler(handler)
@classmethod
async def shutdown(cls, event, sig=None, handler=None):
cls._logger.debug("shutdown...")
tasks = [t for t in asyncio.all_tasks() if t is not
asyncio.current_task()]
[task.cancel() for task in tasks]
cls._logger.debug("complete remaining tasks...")
await asyncio.gather(*tasks, return_exceptions=True)
loop = asyncio.get_event_loop()
loop.stop()

View File

@@ -4,13 +4,13 @@ import queue
class Buffer(queue.Queue):
'''
チャットデータを格納するバッファの役割を持つFIFOキュー
Buffer for storing chat data.
Parameter
---------
max_size : int
格納するチャットブロックの最大個数。0の場合は無限。
最大値を超える場合は古いチャットブロックから破棄される。
maxsize : int
Maximum number of chat blocks to be stored.
If it exceeds the maximum, the oldest chat block will be discarded.
'''
def __init__(self, maxsize=0):

View File

@@ -21,54 +21,53 @@ MAX_RETRY = 10
class LiveChat:
''' スレッドプールを利用してYouTubeのライブ配信のチャットデータを取得する
'''
LiveChat object fetches chat data and stores them
in a buffer with ThreadpoolExecutor.
Parameter
---------
video_id : str
動画ID
seektime : int
(ライブチャット取得時は無視)
取得開始するアーカイブ済みチャットの経過時間(秒)
マイナス値を指定した場合は、配信開始前のチャットも取得する。
start position of fetching chat (seconds).
This option is valid for archived chat only.
If negative value, chat data posted before the start of the broadcast
will be retrieved as well.
processor : ChatProcessor
チャットデータを加工するオブジェクト
buffer : Buffer(maxsize:20[default])
チャットデータchat_componentを格納するバッファ。
maxsize : 格納できるchat_componentの個数
default値20個。1個で約5~10秒分。
buffer : Buffer
buffer of chat data fetched background.
interruptable : bool
Ctrl+Cによる処理中断を行うかどうか。
Allows keyboard interrupts.
Set this parameter to False if your own threading program causes
the problem.
callback : func
_listen()関数から一定間隔で自動的に呼びだす関数。
function called periodically from _listen().
done_callback : func
listener終了時に呼び出すコールバック。
function called when listener ends.
direct_mode : bool
Trueの場合、bufferを使わずにcallbackを呼ぶ。
Trueの場合、callbackの設定が必須
(設定していない場合IllegalFunctionCall例外を発生させる
If True, invoke specified callback function without using buffer.
callback is required. If not, IllegalFunctionCall will be raised.
force_replay : bool
Trueの場合、ライブチャットが取得できる場合であっても
強制的にアーカイブ済みチャットを取得する。
force to fetch archived chat data, even if specified video is live.
topchat_only : bool
Trueの場合、上位チャットのみ取得する。
If True, get only top chat.
Attributes
---------
_executor : ThreadPoolExecutor
チャットデータ取得ループ_listen用のスレッド
This is used for _listen() loop.
_is_alive : bool
チャット取得を停止するためのフラグ
Flag to stop getting chat.
'''
_setup_finished = False
@@ -112,24 +111,24 @@ class LiveChat:
self._setup()
def _setup(self):
# direct modeがTrueでcallback未設定の場合例外発生。
# An exception is raised when direct mode is true and no callback is set.
if self._direct_mode:
if self._callback is None:
raise exceptions.IllegalFunctionCall(
"When direct_mode=True, callback parameter is required.")
else:
# direct modeがFalseでbufferが未設定ならばデフォルトのbufferを作成
# Create a default buffer if `direct_mode` is False and buffer is not set.
if self._buffer is None:
self._buffer = Buffer(maxsize=20)
# callbackが指定されている場合はcallbackを呼ぶループタスクを作成
# Create a loop task to call callback if the `callback` param is specified.
if self._callback is None:
pass
else:
# callbackを呼ぶループタスクの開始
# Start a loop task calling callback function.
self._executor.submit(self._callback_loop, self._callback)
# _listenループタスクの開始
# Start a loop task for _listen()
self.listen_task = self._executor.submit(self._startlisten)
# add_done_callbackの登録
# Register add_done_callback
if self._done_callback is None:
self.listen_task.add_done_callback(self._finish)
else:
@@ -180,12 +179,12 @@ class LiveChat:
except exceptions.ChatParseException as e:
self._logger.debug(f"[{self._video_id}]{str(e)}")
raise
except (TypeError, json.JSONDecodeError):
except Exception:
self._logger.error(f"{traceback.format_exc(limit=-1)}")
raise
self._logger.debug(f"[{self._video_id}]finished fetching chat.")
raise exceptions.ChatDataFinished
self._logger.debug(f"[{self._video_id}] finished fetching chat.")
def _check_pause(self, continuation):
if self._pauser.empty():
@@ -236,30 +235,30 @@ class LiveChat:
'''
continuation = urllib.parse.quote(continuation)
livechat_json = None
status_code = 0
url = f"https://www.youtube.com/{self._fetch_url}{continuation}&pbj=1"
for _ in range(MAX_RETRY + 1):
with client:
try:
livechat_json = client.get(url, headers=headers).json()
break
except json.JSONDecodeError:
time.sleep(1)
except (json.JSONDecodeError, httpx.HTTPError):
time.sleep(2)
continue
else:
self._logger.error(f"[{self._video_id}]"
f"Exceeded retry count. status_code={status_code}")
f"Exceeded retry count.")
raise exceptions.RetryExceedMaxCount()
return livechat_json
def _callback_loop(self, callback):
""" コンストラクタでcallbackを指定している場合、バックグラウンドで
callbackに指定された関数に一定間隔でチャットデータを投げる。
""" If a callback is specified in the constructor,
it throws chat data at regular intervals to the
function specified in the callback in the backgroun
Parameter
---------
callback : func
加工済みのチャットデータを渡す先の関数。
function to which the processed chat data is passed.
"""
while self.is_alive():
items = self._buffer.get()
@@ -270,11 +269,13 @@ class LiveChat:
self._callback(processed_chat)
def get(self):
""" bufferからデータを取り出し、processorに投げ、
加工済みのチャットデータを返す。
"""
Retrieves data from the buffer,
throws it to the processor,
and returns the processed chat data.
Returns
: Processorによって加工されたチャットデータ
: Chat data processed by the Processor
"""
if self._callback is None:
if self.is_alive():
@@ -283,7 +284,7 @@ class LiveChat:
else:
return []
raise exceptions.IllegalFunctionCall(
"既にcallbackを登録済みのため、get()は実行できません。")
"Callback parameter is already set, so get() cannot be performed.")
def is_replay(self):
return self._is_replay
@@ -304,13 +305,16 @@ class LiveChat:
return self._is_alive
def _finish(self, sender):
'''Listener終了時のコールバック'''
'''Called when the _listen() task finished.'''
try:
self._task_finished()
except CancelledError:
self._logger.debug(f'[{self._video_id}]cancelled:{sender}')
self._logger.debug(f'[{self._video_id}] cancelled:{sender}')
def terminate(self):
'''
Terminate fetching chats.
'''
if self._pauser.empty():
self._pauser.put_nowait(None)
self._is_alive = False
@@ -319,9 +323,6 @@ class LiveChat:
self.processor.finalize()
def _task_finished(self):
'''
Listenerを終了する。
'''
if self.is_alive():
self.terminate()
try:
@@ -330,7 +331,7 @@ class LiveChat:
self.exception = e
if not isinstance(e, exceptions.ChatParseException):
self._logger.error(f'Internal exception - {type(e)}{str(e)}')
self._logger.info(f'[{self._video_id}]終了しました')
self._logger.info(f'[{self._video_id}] finished.')
def raise_for_status(self):
if self.exception is not None:

View File

@@ -38,7 +38,9 @@ class InvalidVideoIdException(Exception):
'''
Thrown when the video_id is not exist (VideoInfo).
'''
pass
def __init__(self, doc):
self.msg = "InvalidVideoIdException"
self.doc = doc
class UnknownConnectionError(Exception):
@@ -47,7 +49,7 @@ class UnknownConnectionError(Exception):
class RetryExceedMaxCount(Exception):
'''
thrown when the number of retries exceeds the maximum value.
Thrown when the number of retries exceeds the maximum value.
'''
pass
@@ -66,14 +68,14 @@ class FailedExtractContinuation(ChatDataFinished):
class VideoInfoParseError(Exception):
'''
thrown when failed to parse video info
Base exception when parsing video info.
'''
class PatternUnmatchError(VideoInfoParseError):
'''
thrown when failed to parse video info with unmatched pattern
Thrown when failed to parse video info with unmatched pattern.
'''
def __init__(self, doc):
def __init__(self, doc=''):
self.msg = "PatternUnmatchError"
self.doc = doc

View File

@@ -1,133 +0,0 @@
from base64 import urlsafe_b64encode as b64enc
from functools import reduce
import urllib.parse
'''
Generate continuation parameter of youtube replay chat.
Author: taizan-hokuto (2019) @taizan205
ver 0.0.1 2019.10.05
'''
def _gen_vid_long(video_id):
"""generate video_id parameter.
Parameter
---------
video_id : str
Return
---------
byte[] : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_sep_1 = b'\x1A\x13\xEA\xA8\xDD\xB9\x01\x0D\x0A\x0B'
header_terminator = b'\x20\x01'
item = [
header_magic,
_nval(len(header_id)),
header_id,
header_sep_1,
header_id,
header_terminator
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _gen_vid(video_id):
"""generate video_id parameter.
Parameter
---------
video_id : str
Return
---------
bytes : base64 encoded video_id parameter.
"""
header_magic = b'\x0A\x0F\x1A\x0D\x0A'
header_id = video_id.encode()
header_terminator = b'\x20\x01'
item = [
header_magic,
_nval(len(header_id)),
header_id,
header_terminator
]
return urllib.parse.quote(
b64enc(reduce(lambda x, y: x + y, item)).decode()
).encode()
def _nval(val):
"""convert value to byte array"""
if val < 0:
raise ValueError
buf = b''
while val >> 7:
m = val & 0xFF | 0x80
buf += m.to_bytes(1, 'big')
val >>= 7
buf += val.to_bytes(1, 'big')
return buf
def _build(video_id, seektime, topchat_only):
switch_01 = b'\x04' if topchat_only else b'\x01'
if seektime < 0:
raise ValueError("seektime must be greater than or equal to zero.")
if seektime == 0:
times = b''
else:
times = _nval(int(seektime * 1000))
if seektime > 0:
_len_time = b'\x5A' + (len(times) + 1).to_bytes(1, 'big') + b'\x10'
else:
_len_time = b''
header_magic = b'\xA2\x9D\xB0\xD3\x04'
sep_0 = b'\x1A'
vid = _gen_vid(video_id)
_tag = b'\x40\x01'
timestamp1 = times
sep_1 = b'\x60\x04\x72\x02\x08'
terminator = b'\x78\x01'
body = [
sep_0,
_nval(len(vid)),
vid,
_tag,
_len_time,
timestamp1,
sep_1,
switch_01,
terminator
]
body = reduce(lambda x, y: x + y, body)
return urllib.parse.quote(
b64enc(header_magic + _nval(len(body)) + body
).decode()
)
def getparam(video_id, seektime=0.0, topchat_only=False):
'''
Parameter
---------
seektime : int
unit:seconds
start position of fetching chat data.
topchat_only : bool
if True, fetch only 'top chat'
'''
return _build(video_id, seektime, topchat_only)

View File

@@ -8,15 +8,26 @@ from .. import exceptions
class Parser:
'''
Parser of chat json.
Parameter
----------
is_replay : bool
__slots__ = ['is_replay']
exception_holder : Object [default:Npne]
The object holding exceptions.
This is passed from the parent livechat object.
'''
__slots__ = ['is_replay', 'exception_holder']
def __init__(self, is_replay):
def __init__(self, is_replay, exception_holder=None):
self.is_replay = is_replay
self.exception_holder = exception_holder
def get_contents(self, jsn):
if jsn is None:
raise exceptions.IllegalFunctionCall('Called with none JSON object.')
self.raise_exception(exceptions.IllegalFunctionCall('Called with none JSON object.'))
if jsn['response']['responseContext'].get('errors'):
raise exceptions.ResponseContextError(
'The video_id would be wrong, or video is deleted or private.')
@@ -42,11 +53,11 @@ class Parser:
if contents is None:
'''Broadcasting end or cannot fetch chat stream'''
raise exceptions.NoContents('Chat data stream is empty.')
self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise exceptions.NoContinuation('No Continuation')
self.raise_exception(exceptions.NoContinuation('No Continuation'))
metadata = (cont.get('invalidationContinuationData')
or cont.get('timedContinuationData')
or cont.get('reloadContinuationData')
@@ -54,13 +65,13 @@ class Parser:
)
if metadata is None:
if cont.get("playerSeekContinuationData"):
raise exceptions.ChatDataFinished('Finished chat data')
self.raise_exception(exceptions.ChatDataFinished('Finished chat data'))
unknown = list(cont.keys())[0]
if unknown:
raise exceptions.ReceivedUnknownContinuation(
f"Received unknown continuation type:{unknown}")
self.raise_exception(exceptions.ReceivedUnknownContinuation(
f"Received unknown continuation type:{unknown}"))
else:
raise exceptions.FailedExtractContinuation('Cannot extract continuation data')
self.raise_exception(exceptions.FailedExtractContinuation('Cannot extract continuation data'))
return self._create_data(metadata, contents)
def reload_continuation(self, contents):
@@ -72,7 +83,7 @@ class Parser:
"""
if contents is None:
'''Broadcasting end or cannot fetch chat stream'''
raise exceptions.NoContents('Chat data stream is empty.')
self.raise_exception(exceptions.NoContents('Chat data stream is empty.'))
cont = contents['liveChatContinuation']['continuations'][0]
if cont.get("liveChatReplayContinuationData"):
# chat data exist.
@@ -81,7 +92,7 @@ class Parser:
init_cont = cont.get("playerSeekContinuationData")
if init_cont:
return init_cont.get("continuation")
raise exceptions.ChatDataFinished('Finished chat data')
self.raise_exception(exceptions.ChatDataFinished('Finished chat data'))
def _create_data(self, metadata, contents):
actions = contents['liveChatContinuation'].get('actions')
@@ -103,3 +114,8 @@ class Parser:
start = int(actions[0]["replayChatItemAction"]["videoOffsetTimeMsec"])
last = int(actions[-1]["replayChatItemAction"]["videoOffsetTimeMsec"])
return (last - start)
def raise_exception(self, exception):
if self.exception_holder is None:
raise exception
self.exception_holder = exception

View File

@@ -36,3 +36,7 @@ class Combinator(ChatProcessor):
'''
return tuple(processor.process(chat_components)
for processor in self.processors)
def finalize(self, *args, **kwargs):
[processor.finalize(*args, **kwargs)
for processor in self.processors]

View File

@@ -0,0 +1,11 @@
import json
from .renderer.base import Author
from .renderer.paidmessage import Colors
from .renderer.paidsticker import Colors2
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Author) or isinstance(obj, Colors) or isinstance(obj, Colors2):
return vars(obj)
return json.JSONEncoder.default(self, obj)

View File

@@ -1,5 +1,7 @@
import asyncio
import json
import time
from .custom_encoder import CustomEncoder
from .renderer.textmessage import LiveChatTextMessageRenderer
from .renderer.paidmessage import LiveChatPaidMessageRenderer
from .renderer.paidsticker import LiveChatPaidStickerRenderer
@@ -11,25 +13,120 @@ from ... import config
logger = config.logger(__name__)
class Chat:
def json(self) -> str:
return json.dumps(vars(self), ensure_ascii=False, cls=CustomEncoder)
class Chatdata:
def __init__(self, chatlist: list, timeout: float):
def __init__(self, chatlist: list, timeout: float, abs_diff):
self.items = chatlist
self.interval = timeout
self.abs_diff = abs_diff
self.itemcount = 0
def tick(self):
if self.interval == 0:
'''DEPRECATE
Use sync_items()
'''
if len(self.items) < 1:
time.sleep(1)
return
time.sleep(self.interval / len(self.items))
if self.itemcount == 0:
self.starttime = time.time()
if len(self.items) == 1:
total_itemcount = 1
else:
total_itemcount = len(self.items) - 1
next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000
tobe_disptime = self.abs_diff + next_chattime
wait_sec = tobe_disptime - time.time()
self.itemcount += 1
if wait_sec < 0:
wait_sec = 0
time.sleep(wait_sec)
async def tick_async(self):
if self.interval == 0:
'''DEPRECATE
Use async_items()
'''
if len(self.items) < 1:
await asyncio.sleep(1)
return
await asyncio.sleep(self.interval / len(self.items))
if self.itemcount == 0:
self.starttime = time.time()
if len(self.items) == 1:
total_itemcount = 1
else:
total_itemcount = len(self.items) - 1
next_chattime = (self.items[0].timestamp + (self.items[-1].timestamp - self.items[0].timestamp) / total_itemcount * self.itemcount) / 1000
tobe_disptime = self.abs_diff + next_chattime
wait_sec = tobe_disptime - time.time()
self.itemcount += 1
if wait_sec < 0:
wait_sec = 0
await asyncio.sleep(wait_sec)
def sync_items(self):
starttime = time.time()
if len(self.items) > 0:
last_chattime = self.items[-1].timestamp / 1000
tobe_disptime = self.abs_diff + last_chattime
wait_total_sec = max(tobe_disptime - time.time(), 0)
if len(self.items) > 1:
wait_sec = wait_total_sec / len(self.items)
elif len(self.items) == 1:
wait_sec = 0
for c in self.items:
if wait_sec < 0:
wait_sec = 0
time.sleep(wait_sec)
yield c
stop_interval = time.time() - starttime
if stop_interval < 1:
time.sleep(1 - stop_interval)
async def async_items(self):
starttime = time.time()
if len(self.items) > 0:
last_chattime = self.items[-1].timestamp / 1000
tobe_disptime = self.abs_diff + last_chattime
wait_total_sec = max(tobe_disptime - time.time(), 0)
if len(self.items) > 1:
wait_sec = wait_total_sec / len(self.items)
elif len(self.items) == 1:
wait_sec = 0
for c in self.items:
if wait_sec < 0:
wait_sec = 0
await asyncio.sleep(wait_sec)
yield c
stop_interval = time.time() - starttime
if stop_interval < 1:
await asyncio.sleep(1 - stop_interval)
def json(self) -> str:
return ''.join(("[", ','.join((a.json() for a in self.items)), "]"))
class DefaultProcessor(ChatProcessor):
def __init__(self):
self.first = True
self.abs_diff = 0
self.renderers = {
"liveChatTextMessageRenderer": LiveChatTextMessageRenderer(),
"liveChatPaidMessageRenderer": LiveChatPaidMessageRenderer(),
"liveChatPaidStickerRenderer": LiveChatPaidStickerRenderer(),
"liveChatLegacyPaidMessageRenderer": LiveChatLegacyPaidMessageRenderer(),
"liveChatMembershipItemRenderer": LiveChatMembershipItemRenderer()
}
def process(self, chat_components: list):
chatlist = []
@@ -37,8 +134,10 @@ class DefaultProcessor(ChatProcessor):
if chat_components:
for component in chat_components:
if component is None:
continue
timeout += component.get('timeout', 0)
chatdata = component.get('chatdata')
chatdata = component.get('chatdata') # if from Extractor, chatdata is generator.
if chatdata is None:
continue
for action in chatdata:
@@ -46,43 +145,35 @@ class DefaultProcessor(ChatProcessor):
continue
if action.get('addChatItemAction') is None:
continue
if action['addChatItemAction'].get('item') is None:
item = action['addChatItemAction'].get('item')
if item is None:
continue
chat = self._parse(action)
chat = self._parse(item)
if chat:
chatlist.append(chat)
return Chatdata(chatlist, float(timeout))
if self.first and chatlist:
self.abs_diff = time.time() - chatlist[0].timestamp / 1000
self.first = False
def _parse(self, sitem):
action = sitem.get("addChatItemAction")
if action:
item = action.get("item")
if item is None:
return None
chatdata = Chatdata(chatlist, float(timeout), self.abs_diff)
return chatdata
def _parse(self, item):
try:
renderer = self._get_renderer(item)
key = list(item.keys())[0]
renderer = self.renderers.get(key)
if renderer is None:
return None
renderer.setitem(item.get(key), Chat())
renderer.settype()
renderer.get_snippet()
renderer.get_authordetails()
rendered_chatobj = renderer.get_chatobj()
renderer.clear()
except (KeyError, TypeError) as e:
logger.error(f"{str(type(e))}-{str(e)} sitem:{str(sitem)}")
logger.error(f"{str(type(e))}-{str(e)} item:{str(item)}")
return None
return renderer
def _get_renderer(self, item):
if item.get("liveChatTextMessageRenderer"):
renderer = LiveChatTextMessageRenderer(item)
elif item.get("liveChatPaidMessageRenderer"):
renderer = LiveChatPaidMessageRenderer(item)
elif item.get("liveChatPaidStickerRenderer"):
renderer = LiveChatPaidStickerRenderer(item)
elif item.get("liveChatLegacyPaidMessageRenderer"):
renderer = LiveChatLegacyPaidMessageRenderer(item)
elif item.get("liveChatMembershipItemRenderer"):
renderer = LiveChatMembershipItemRenderer(item)
else:
renderer = None
return renderer
return rendered_chatobj

View File

@@ -6,89 +6,96 @@ class Author:
class BaseRenderer:
def __init__(self, item, chattype):
self.renderer = list(item.values())[0]
self.chattype = chattype
self.author = Author()
def setitem(self, item, chat):
self.item = item
self.chat = chat
self.chat.author = Author()
def settype(self):
pass
def get_snippet(self):
self.type = self.chattype
self.id = self.renderer.get('id')
timestampUsec = int(self.renderer.get("timestampUsec", 0))
self.timestamp = int(timestampUsec / 1000)
tst = self.renderer.get("timestampText")
self.chat.id = self.item.get('id')
timestampUsec = int(self.item.get("timestampUsec", 0))
self.chat.timestamp = int(timestampUsec / 1000)
tst = self.item.get("timestampText")
if tst:
self.elapsedTime = tst.get("simpleText")
self.chat.elapsedTime = tst.get("simpleText")
else:
self.elapsedTime = ""
self.datetime = self.get_datetime(timestampUsec)
self.message, self.messageEx = self.get_message(self.renderer)
self.id = self.renderer.get('id')
self.amountValue = 0.0
self.amountString = ""
self.currency = ""
self.bgColor = 0
self.chat.elapsedTime = ""
self.chat.datetime = self.get_datetime(timestampUsec)
self.chat.message, self.chat.messageEx = self.get_message(self.item)
self.chat.id = self.item.get('id')
self.chat.amountValue = 0.0
self.chat.amountString = ""
self.chat.currency = ""
self.chat.bgColor = 0
def get_authordetails(self):
self.author.badgeUrl = ""
(self.author.isVerified,
self.author.isChatOwner,
self.author.isChatSponsor,
self.author.isChatModerator) = (
self.get_badges(self.renderer)
self.chat.author.badgeUrl = ""
(self.chat.author.isVerified,
self.chat.author.isChatOwner,
self.chat.author.isChatSponsor,
self.chat.author.isChatModerator) = (
self.get_badges(self.item)
)
self.author.channelId = self.renderer.get("authorExternalChannelId")
self.author.channelUrl = "http://www.youtube.com/channel/" + self.author.channelId
self.author.name = self.renderer["authorName"]["simpleText"]
self.author.imageUrl = self.renderer["authorPhoto"]["thumbnails"][1]["url"]
self.chat.author.channelId = self.item.get("authorExternalChannelId")
self.chat.author.channelUrl = "http://www.youtube.com/channel/" + self.chat.author.channelId
self.chat.author.name = self.item["authorName"]["simpleText"]
self.chat.author.imageUrl = self.item["authorPhoto"]["thumbnails"][1]["url"]
def get_message(self, renderer):
def get_message(self, item):
message = ''
message_ex = []
if renderer.get("message"):
runs = renderer["message"].get("runs")
if runs:
for r in runs:
if r:
if r.get('emoji'):
message += r['emoji'].get('shortcuts', [''])[0]
message_ex.append({
'id': r['emoji'].get('emojiId').split('/')[-1],
'txt': r['emoji'].get('shortcuts', [''])[0],
'url': r['emoji']['image']['thumbnails'][0].get('url')
})
else:
message += r.get('text', '')
message_ex.append(r.get('text', ''))
runs = item.get("message", {}).get("runs", {})
for r in runs:
if not hasattr(r, "get"):
continue
if r.get('emoji'):
message += r['emoji'].get('shortcuts', [''])[0]
message_ex.append({
'id': r['emoji'].get('emojiId').split('/')[-1],
'txt': r['emoji'].get('shortcuts', [''])[0],
'url': r['emoji']['image']['thumbnails'][0].get('url')
})
else:
message += r.get('text', '')
message_ex.append(r.get('text', ''))
return message, message_ex
def get_badges(self, renderer):
self.author.type = ''
self.chat.author.type = ''
isVerified = False
isChatOwner = False
isChatSponsor = False
isChatModerator = False
badges = renderer.get("authorBadges")
if badges:
for badge in badges:
if badge["liveChatAuthorBadgeRenderer"].get("icon"):
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
self.author.type = author_type
if author_type == 'VERIFIED':
isVerified = True
if author_type == 'OWNER':
isChatOwner = True
if author_type == 'MODERATOR':
isChatModerator = True
if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"):
isChatSponsor = True
self.author.type = 'MEMBER'
self.get_badgeurl(badge)
badges = renderer.get("authorBadges", {})
for badge in badges:
if badge["liveChatAuthorBadgeRenderer"].get("icon"):
author_type = badge["liveChatAuthorBadgeRenderer"]["icon"]["iconType"]
self.chat.author.type = author_type
if author_type == 'VERIFIED':
isVerified = True
if author_type == 'OWNER':
isChatOwner = True
if author_type == 'MODERATOR':
isChatModerator = True
if badge["liveChatAuthorBadgeRenderer"].get("customThumbnail"):
isChatSponsor = True
self.chat.author.type = 'MEMBER'
self.get_badgeurl(badge)
return isVerified, isChatOwner, isChatSponsor, isChatModerator
def get_badgeurl(self, badge):
self.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"]
self.chat.author.badgeUrl = badge["liveChatAuthorBadgeRenderer"]["customThumbnail"]["thumbnails"][0]["url"]
def get_datetime(self, timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def get_chatobj(self):
return self.chat
def clear(self):
self.item = None
self.chat = None

View File

@@ -2,14 +2,14 @@ from .base import BaseRenderer
class LiveChatLegacyPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsor")
def settype(self):
self.chat.type = "newSponsor"
def get_authordetails(self):
super().get_authordetails()
self.author.isChatSponsor = True
self.chat.author.isChatSponsor = True
def get_message(self, renderer):
message = (renderer["eventText"]["runs"][0]["text"]
) + ' / ' + (renderer["detailText"]["simpleText"])
def get_message(self, item):
message = (item["eventText"]["runs"][0]["text"]
) + ' / ' + (item["detailText"]["simpleText"])
return message, [message]

View File

@@ -2,14 +2,17 @@ from .base import BaseRenderer
class LiveChatMembershipItemRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "newSponsor")
def settype(self):
self.chat.type = "newSponsor"
def get_authordetails(self):
super().get_authordetails()
self.author.isChatSponsor = True
self.chat.author.isChatSponsor = True
def get_message(self, renderer):
message = ''.join([mes.get("text", "")
for mes in renderer["headerSubtext"]["runs"]])
def get_message(self, item):
try:
message = ''.join([mes.get("text", "")
for mes in item["headerSubtext"]["runs"]])
except KeyError:
return "Welcome New Member!", ["Welcome New Member!"]
return message, [message]

View File

@@ -9,23 +9,23 @@ class Colors:
class LiveChatPaidMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superChat")
def settype(self):
self.chat.type = "superChat"
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer)
self.get_amountdata(self.item)
)
self.amountValue = amount
self.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
self.chat.amountValue = amount
self.chat.amountString = amountDisplayString
self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("bodyBackgroundColor", 0)
self.colors = self.get_colors()
self.chat.bgColor = self.item.get("bodyBackgroundColor", 0)
self.chat.colors = self.get_colors()
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
def get_amountdata(self, item):
amountDisplayString = item["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
@@ -36,11 +36,12 @@ class LiveChatPaidMessageRenderer(BaseRenderer):
return amountDisplayString, symbol, amount
def get_colors(self):
item = self.item
colors = Colors()
colors.headerBackgroundColor = self.renderer.get("headerBackgroundColor", 0)
colors.headerTextColor = self.renderer.get("headerTextColor", 0)
colors.bodyBackgroundColor = self.renderer.get("bodyBackgroundColor", 0)
colors.bodyTextColor = self.renderer.get("bodyTextColor", 0)
colors.timestampColor = self.renderer.get("timestampColor", 0)
colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0)
colors.headerBackgroundColor = item.get("headerBackgroundColor", 0)
colors.headerTextColor = item.get("headerTextColor", 0)
colors.bodyBackgroundColor = item.get("bodyBackgroundColor", 0)
colors.bodyTextColor = item.get("bodyTextColor", 0)
colors.timestampColor = item.get("timestampColor", 0)
colors.authorNameTextColor = item.get("authorNameTextColor", 0)
return colors

View File

@@ -4,30 +4,30 @@ from .base import BaseRenderer
superchat_regex = re.compile(r"^(\D*)(\d{1,3}(,\d{3})*(\.\d*)*\b)$")
class Colors:
class Colors2:
pass
class LiveChatPaidStickerRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "superSticker")
def settype(self):
self.chat.type = "superSticker"
def get_snippet(self):
super().get_snippet()
amountDisplayString, symbol, amount = (
self.get_amountdata(self.renderer)
self.get_amountdata(self.item)
)
self.amountValue = amount
self.amountString = amountDisplayString
self.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
self.chat.amountValue = amount
self.chat.amountString = amountDisplayString
self.chat.currency = currency.symbols[symbol]["fxtext"] if currency.symbols.get(
symbol) else symbol
self.bgColor = self.renderer.get("backgroundColor", 0)
self.sticker = "".join(("https:",
self.renderer["sticker"]["thumbnails"][0]["url"]))
self.colors = self.get_colors()
self.chat.bgColor = self.item.get("backgroundColor", 0)
self.chat.sticker = "".join(("https:",
self.item["sticker"]["thumbnails"][0]["url"]))
self.chat.colors = self.get_colors()
def get_amountdata(self, renderer):
amountDisplayString = renderer["purchaseAmountText"]["simpleText"]
def get_amountdata(self, item):
amountDisplayString = item["purchaseAmountText"]["simpleText"]
m = superchat_regex.search(amountDisplayString)
if m:
symbol = m.group(1)
@@ -38,9 +38,10 @@ class LiveChatPaidStickerRenderer(BaseRenderer):
return amountDisplayString, symbol, amount
def get_colors(self):
colors = Colors()
colors.moneyChipBackgroundColor = self.renderer.get("moneyChipBackgroundColor", 0)
colors.moneyChipTextColor = self.renderer.get("moneyChipTextColor", 0)
colors.backgroundColor = self.renderer.get("backgroundColor", 0)
colors.authorNameTextColor = self.renderer.get("authorNameTextColor", 0)
item = self.item
colors = Colors2()
colors.moneyChipBackgroundColor = item.get("moneyChipBackgroundColor", 0)
colors.moneyChipTextColor = item.get("moneyChipTextColor", 0)
colors.backgroundColor = item.get("backgroundColor", 0)
colors.authorNameTextColor = item.get("authorNameTextColor", 0)
return colors

View File

@@ -2,5 +2,5 @@ from .base import BaseRenderer
class LiveChatTextMessageRenderer(BaseRenderer):
def __init__(self, item):
super().__init__(item, "textMessage")
def settype(self):
self.chat.type = "textMessage"

View File

@@ -1,10 +1,13 @@
import httpx
import os
import re
import httpx
import time
from base64 import standard_b64encode
from concurrent.futures import ThreadPoolExecutor
from .chat_processor import ChatProcessor
from .default.processor import DefaultProcessor
from ..exceptions import UnknownConnectionError
import tempfile
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
@@ -43,20 +46,24 @@ class HTMLArchiver(ChatProcessor):
'''
HTMLArchiver saves chat data as HTML table format.
'''
def __init__(self, save_path):
def __init__(self, save_path, callback=None):
super().__init__()
self.client = httpx.Client(http2=True)
self.save_path = self._checkpath(save_path)
self.processor = DefaultProcessor()
self.emoji_table = {} # tuble for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.header = [HEADER_HTML]
self.body = ['<body>\n', '<table class="css">\n', self._parse_table_header(fmt_headers)]
self.emoji_table = {} # dict for custom emojis. key: emoji_id, value: base64 encoded image binary.
self.callback = callback
self.executor = ThreadPoolExecutor(max_workers=10)
self.tmp_fp = tempfile.NamedTemporaryFile(mode="a", encoding="utf-8", delete=False)
self.tmp_filename = self.tmp_fp.name
self.counter = 0
def _checkpath(self, filepath):
splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0]
extention = splitter[1]
newpath = filepath
counter = 0
counter = 1
while os.path.exists(newpath):
match = re.search(PATTERN, body)
if match:
@@ -76,21 +83,26 @@ class HTMLArchiver(ChatProcessor):
save_path : str :
Actual save path of file.
total_lines : int :
count of total lines written to the file.
Count of total lines written to the file.
"""
if chat_components is None or len(chat_components) == 0:
return
self.body.extend(
(self._parse_html_line((
c.datetime,
c.elapsedTime,
c.author.name,
self._parse_message(c.messageEx),
c.amountString,
c.author.type,
c.author.channelId)
) for c in self.processor.process(chat_components).items)
)
return self.save_path ,self.counter
for c in self.processor.process(chat_components).items:
self.tmp_fp.write(
self._parse_html_line((
c.datetime,
c.elapsedTime,
c.author.name,
self._parse_message(c.messageEx),
c.amountString,
c.author.type,
c.author.channelId)
)
)
if self.callback:
self.callback(None, 1)
self.counter += 1
return self.save_path, self.counter
def _parse_html_line(self, raw_line):
return ''.join(('<tr>',
@@ -108,13 +120,23 @@ class HTMLArchiver(ChatProcessor):
for item in message_items)
def _encode_img(self, url):
resp = httpx.get(url)
err = None
for _ in range(5):
try:
resp = self.client.get(url, timeout=30)
break
except httpx.HTTPError as e:
err = e
time.sleep(3)
else:
raise UnknownConnectionError(str(err))
return standard_b64encode(resp.content).decode()
def _set_emoji_table(self, item: dict):
emoji_id = item['id']
emoji_id = ''.join(('Z', item['id'])) if 48 <= ord(item['id'][0]) <= 57 else item['id']
if emoji_id not in self.emoji_table:
self.emoji_table.setdefault(emoji_id, self._encode_img(item['url']))
self.emoji_table.setdefault(emoji_id, self.executor.submit(self._encode_img, item['url']))
return emoji_id
def _stylecode(self, name, code, width, height):
@@ -125,13 +147,24 @@ class HTMLArchiver(ChatProcessor):
def _create_styles(self):
return '\n'.join(('<style type="text/css">',
TABLE_CSS,
'\n'.join(self._stylecode(key, self.emoji_table[key], 24, 24)
'\n'.join(self._stylecode(key, self.emoji_table[key].result(), 24, 24)
for key in self.emoji_table.keys()),
'</style>\n'))
def finalize(self):
self.header.extend([self._create_styles(), '</head>\n'])
self.body.extend(['</table>\n</body>'])
with open(self.save_path, mode='a', encoding='utf-8') as f:
f.writelines(self.header)
f.writelines(self.body)
if self.tmp_fp:
self.tmp_fp.flush()
self.tmp_fp = None
with open(self.save_path, mode='w', encoding='utf-8') as outfile:
# write header
outfile.writelines((
HEADER_HTML, self._create_styles(), '</head>\n',
'<body>\n', '<table class="css">\n',
self._parse_table_header(fmt_headers)))
# write body
fp = open(self.tmp_filename, mode="r", encoding="utf-8")
for line in fp:
outfile.write(line)
outfile.write('</table>\n</body>\n</html>')
fp.close()
os.remove(self.tmp_filename)

View File

@@ -1,5 +1,6 @@
import httpx
import asyncio
import httpx
import socket
from . import parser
from . block import Block
from . worker import ExtractWorker
@@ -11,11 +12,15 @@ from concurrent.futures import CancelledError
from json import JSONDecodeError
from urllib.parse import quote
headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay/" \
"get_live_chat_replay?continuation="
MAX_RETRY_COUNT = 3
# Set to avoid duplicate parameters
param_set = set()
def _split(start, end, count, min_interval_sec=120):
"""
@@ -50,6 +55,7 @@ def _split(start, end, count, min_interval_sec=120):
def ready_blocks(video_id, duration, div, callback):
param_set.clear()
if div <= 0:
raise ValueError
@@ -62,16 +68,24 @@ def ready_blocks(video_id, duration, div, callback):
async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime=seektime)
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
err = None
for _ in range(MAX_RETRY_COUNT):
try:
resp = await session.get(url, headers=headers)
if continuation in param_set:
next_continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=headers, timeout=10)
next_continuation, actions = parser.parse(resp.json())
break
except JSONDecodeError:
await asyncio.sleep(3)
except httpx.HTTPError as e:
err = e
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
raise UnknownConnectionError("Abort:" + str(err))
if actions:
first = parser.get_offset(actions[0])
@@ -110,16 +124,27 @@ def fetch_patch(callback, blocks, video_id):
async def _fetch(continuation, session) -> Patch:
url = f"{REPLAY_URL}{quote(continuation)}&pbj=1"
err = None
for _ in range(MAX_RETRY_COUNT):
try:
if continuation in param_set:
continuation, actions = None, []
break
param_set.add(continuation)
resp = await session.get(url, headers=config.headers)
continuation, actions = parser.parse(resp.json())
break
except JSONDecodeError:
await asyncio.sleep(3)
except httpx.HTTPError as e:
err = e
await asyncio.sleep(3)
except socket.error as error:
print("socket error", error.errno)
await asyncio.sleep(3)
else:
cancel()
raise UnknownConnectionError("Abort: Unknown connection error.")
raise UnknownConnectionError("Abort:" + str(err))
if actions:
last = parser.get_offset(actions[-1])
@@ -140,15 +165,10 @@ def fetch_patch(callback, blocks, video_id):
async def _shutdown():
print("\nshutdown...")
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
def cancel():

View File

@@ -1,3 +1,4 @@
from typing import Generator
from . import asyncdl
from . import duplcheck
from .. videoinfo import VideoInfo
@@ -60,11 +61,10 @@ class Extractor:
self.blocks = duplcheck.remove_duplicate_tail(self.blocks)
return self
def _combine(self):
ret = []
def _get_chatdata(self) -> Generator:
for block in self.blocks:
ret.extend(block.chat_data)
return ret
for chatdata in block.chat_data:
yield chatdata
def _execute_extract_operations(self):
return (
@@ -74,7 +74,7 @@ class Extractor:
._remove_overlap()
._download_blocks()
._remove_duplicate_tail()
._combine()
._get_chatdata()
)
def extract(self):

View File

@@ -42,10 +42,14 @@ def get_offset(item):
def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].values())[0].get('id')
a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
if a:
return list(a.values())[0].get('id')
return None
def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].keys())[0]
a = list(item['replayChatItemAction']["actions"][0].values())[0].get('item')
if a:
return list(a.keys())[0]
return None

View File

@@ -7,7 +7,6 @@ from typing import Tuple
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.

View File

@@ -1,146 +0,0 @@
import httpx
import asyncio
import json
from . import parser
from . block import Block
from . worker import ExtractWorker
from . patch import Patch
from ... import config
from ... paramgen import arcparam_mining as arcparam
from concurrent.futures import CancelledError
from urllib.parse import quote
headers = config.headers
REPLAY_URL = "https://www.youtube.com/live_chat_replay?continuation="
INTERVAL = 1
def _split(start, end, count, min_interval_sec=120):
"""
Split section from `start` to `end` into `count` pieces,
and returns the beginning of each piece.
The `count` is adjusted so that the length of each piece
is no smaller than `min_interval`.
Returns:
--------
List of the offset of each block's first chat data.
"""
if not (isinstance(start, int) or isinstance(start, float)) or \
not (isinstance(end, int) or isinstance(end, float)):
raise ValueError("start/end must be int or float")
if not isinstance(count, int):
raise ValueError("count must be int")
if start > end:
raise ValueError("end must be equal to or greater than start.")
if count < 1:
raise ValueError("count must be equal to or greater than 1.")
if (end - start) / count < min_interval_sec:
count = int((end - start) / min_interval_sec)
if count == 0:
count = 1
interval = (end - start) / count
if count == 1:
return [start]
return sorted(list(set([int(start + interval * j)
for j in range(count)])))
def ready_blocks(video_id, duration, div, callback):
if div <= 0:
raise ValueError
async def _get_blocks(video_id, duration, div, callback):
async with httpx.ClientSession() as session:
tasks = [_create_block(session, video_id, seektime, callback)
for seektime in _split(0, duration, div)]
return await asyncio.gather(*tasks)
async def _create_block(session, video_id, seektime, callback):
continuation = arcparam.getparam(video_id, seektime=seektime)
url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
f"{int(seektime*1000)}&hidden=false&pbj=1")
async with session.get(url, headers=headers) as resp:
chat_json = await resp.text()
if chat_json is None:
return
continuation, actions = parser.parse(json.loads(chat_json)[1])
first = seektime
seektime += INTERVAL
if callback:
callback(actions, INTERVAL)
return Block(
continuation=continuation,
chat_data=actions,
first=first,
last=seektime,
seektime=seektime
)
"""
fetch initial blocks.
"""
loop = asyncio.get_event_loop()
blocks = loop.run_until_complete(
_get_blocks(video_id, duration, div, callback))
return blocks
def fetch_patch(callback, blocks, video_id):
async def _allocate_workers():
workers = [
ExtractWorker(
fetch=_fetch, block=block,
blocks=blocks, video_id=video_id
)
for block in blocks
]
async with httpx.ClientSession() as session:
tasks = [worker.run(session) for worker in workers]
return await asyncio.gather(*tasks)
async def _fetch(seektime, session) -> Patch:
continuation = arcparam.getparam(video_id, seektime=seektime)
url = (f"{REPLAY_URL}{quote(continuation)}&playerOffsetMs="
f"{int(seektime*1000)}&hidden=false&pbj=1")
async with session.get(url, headers=config.headers) as resp:
chat_json = await resp.text()
actions = []
try:
if chat_json is None:
return Patch()
continuation, actions = parser.parse(json.loads(chat_json)[1])
except json.JSONDecodeError:
pass
if callback:
callback(actions, INTERVAL)
return Patch(chats=actions, continuation=continuation,
seektime=seektime, last=seektime)
"""
allocate workers and assign blocks.
"""
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(_allocate_workers())
except CancelledError:
pass
async def _shutdown():
print("\nshutdown...")
tasks = [t for t in asyncio.all_tasks()
if t is not asyncio.current_task()]
for task in tasks:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
def cancel():
loop = asyncio.get_event_loop()
loop.create_task(_shutdown())

View File

@@ -1,62 +0,0 @@
from . import parser
class Block:
"""Block object represents something like a box
to join chunk of chatdata.
Parameter:
---------
first : int :
videoOffsetTimeMs of the first chat_data
(chat_data[0])
last : int :
videoOffsetTimeMs of the last chat_data.
(chat_data[-1])
this value increases as fetching chatdata progresses.
end : int :
target videoOffsetTimeMs of last chat data for extract,
equals to first videoOffsetTimeMs of next block.
when extract worker reaches this offset, stop fetching.
continuation : str :
continuation param of last chat data.
chat_data : list
done : bool :
whether this block has been fetched.
remaining : int :
remaining data to extract.
equals end - last.
is_last : bool :
whether this block is the last one in blocklist.
during_split : bool :
whether this block is in the process of during_split.
while True, this block is excluded from duplicate split procedure.
seektime : float :
the last position of this block(seconds) already fetched.
"""
__slots__ = ['first','last','end','continuation','chat_data','remaining',
'done','is_last','during_split','seektime']
def __init__(self, first = 0, last = 0, end = 0,
continuation = '', chat_data = [], is_last = False,
during_split = False, seektime = None):
self.first = first
self.last = last
self.end = end
self.continuation = continuation
self.chat_data = chat_data
self.done = False
self.remaining = self.end - self.last
self.is_last = is_last
self.during_split = during_split
self.seektime = seektime

View File

@@ -1,73 +0,0 @@
import re
from ... import config
from ... exceptions import (
ResponseContextError,
NoContents, NoContinuation)
logger = config.logger(__name__)
def parse(jsn):
"""
Parse replay chat data.
Parameter:
----------
jsn : dict
JSON of replay chat data.
Returns:
------
continuation : str
actions : list
"""
if jsn is None:
raise ValueError("parameter JSON is None")
if jsn['response']['responseContext'].get('errors'):
raise ResponseContextError(
'video_id is invalid or private/deleted.')
contents = jsn["response"].get('continuationContents')
if contents is None:
raise NoContents('No chat data.')
cont = contents['liveChatContinuation']['continuations'][0]
if cont is None:
raise NoContinuation('No Continuation')
metadata = cont.get('liveChatReplayContinuationData')
if metadata:
continuation = metadata.get("continuation")
actions = contents['liveChatContinuation'].get('actions')
if continuation:
return continuation, [action["replayChatItemAction"]["actions"][0]
for action in actions
if list(action['replayChatItemAction']["actions"][0].values()
)[0]['item'].get("liveChatPaidMessageRenderer")
or list(action['replayChatItemAction']["actions"][0].values()
)[0]['item'].get("liveChatPaidStickerRenderer")
]
return None, []
def get_offset(item):
return int(item['replayChatItemAction']["videoOffsetTimeMsec"])
def get_id(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].values())[0].get('id')
def get_type(item):
return list((list(item['replayChatItemAction']["actions"][0].values()
)[0])['item'].keys())[0]
_REGEX_YTINIT = re.compile(
"window\\[\"ytInitialData\"\\]\\s*=\\s*({.+?});\\s+")
def extract(text):
match = re.findall(_REGEX_YTINIT, str(text))
if match:
return match[0]
return None

View File

@@ -1,27 +0,0 @@
from . import parser
from . block import Block
from typing import NamedTuple
class Patch(NamedTuple):
"""
Patch represents chunk of chat data
which is fetched by asyncdl.fetch_patch._fetch().
"""
chats : list = []
continuation : str = None
seektime : float = None
first : int = None
last : int = None
def fill(block:Block, patch:Patch):
if patch.last < block.end:
set_patch(block, patch)
return
block.continuation = None
def set_patch(block:Block, patch:Patch):
block.continuation = patch.continuation
block.chat_data.extend(patch.chats)
block.last = patch.seektime
block.seektime = patch.seektime

View File

@@ -1,72 +0,0 @@
from . import asyncdl
from . import parser
from .. videoinfo import VideoInfo
from ... import config
from ... exceptions import InvalidVideoIdException
logger = config.logger(__name__)
headers=config.headers
class SuperChatMiner:
def __init__(self, video_id, duration, div, callback):
if not isinstance(div ,int) or div < 1:
raise ValueError('div must be positive integer.')
elif div > 10:
div = 10
if not isinstance(duration ,int) or duration < 1:
raise ValueError('duration must be positive integer.')
self.video_id = video_id
self.duration = duration
self.div = div
self.callback = callback
self.blocks = []
def _ready_blocks(self):
blocks = asyncdl.ready_blocks(
self.video_id, self.duration, self.div, self.callback)
self.blocks = [block for block in blocks if block is not None]
return self
def _set_block_end(self):
for i in range(len(self.blocks)-1):
self.blocks[i].end = self.blocks[i+1].first
self.blocks[-1].end = self.duration
self.blocks[-1].is_last =True
return self
def _download_blocks(self):
asyncdl.fetch_patch(self.callback, self.blocks, self.video_id)
return self
def _combine(self):
ret = []
for block in self.blocks:
ret.extend(block.chat_data)
return ret
def extract(self):
return (
self._ready_blocks()
._set_block_end()
._download_blocks()
._combine()
)
def extract(video_id, div = 1, callback = None, processor = None):
duration = 0
try:
duration = VideoInfo(video_id).get_duration()
except InvalidVideoIdException:
raise
if duration == 0:
print("video is live.")
return []
data = SuperChatMiner(video_id, duration, div, callback).extract()
if processor is None:
return data
return processor.process(
[{'video_id':None,'timeout':1,'chatdata' : (action
for action in data)}]
)
def cancel():
asyncdl.cancel()

View File

@@ -1,45 +0,0 @@
from . import parser
from . block import Block
from . patch import Patch, fill
from ... paramgen import arcparam
INTERVAL = 1
class ExtractWorker:
"""
ExtractWorker associates a download session with a block.
When the worker finishes fetching, the block
being fetched is splitted and assigned the free worker.
Parameter
----------
fetch : func :
extract function of asyncdl
block : Block :
Block object that includes chat_data
blocks : list :
List of Block(s)
video_id : str :
parent_block : Block :
the block from which current block is splitted
"""
__slots__ = ['block', 'fetch', 'blocks', 'video_id', 'parent_block']
def __init__(self, fetch, block, blocks, video_id ):
self.block:Block = block
self.fetch = fetch
self.blocks:list = blocks
self.video_id:str = video_id
self.parent_block:Block = None
async def run(self, session):
while self.block.continuation:
patch = await self.fetch(
self.block.seektime, session)
fill(self.block, patch)
self.block.seektime += INTERVAL
self.block.done = True

View File

@@ -1,13 +1,15 @@
import httpx
import json
import re
import httpx
import time
from .. import config
from ..exceptions import InvalidVideoIdException, PatternUnmatchError
from ..exceptions import InvalidVideoIdException, PatternUnmatchError, UnknownConnectionError
from ..util.extract_video_id import extract_video_id
headers = config.headers
pattern = re.compile(r"'PLAYER_CONFIG': ({.*}}})")
headers = config.headers
pattern = re.compile(r"['\"]PLAYER_CONFIG['\"]:\s*({.*})")
pattern2 = re.compile(r"yt\.setConfig\((\{[\s\S]*?\})\);")
item_channel_id = [
"videoDetails",
@@ -29,6 +31,10 @@ item_response = [
"embedded_player_response"
]
item_response2 = [
"PLAYER_VARS",
"embedded_player_response"
]
item_author_image = [
"videoDetails",
"embeddedPlayerOverlayVideoDetailsRenderer",
@@ -80,23 +86,61 @@ class VideoInfo:
def __init__(self, video_id):
self.video_id = extract_video_id(video_id)
text = self._get_page_text(self.video_id)
self._parse(text)
self.client = httpx.Client(http2=True)
self.new_pattern_text = False
err = None
for _ in range(3):
try:
text = self._get_page_text(self.video_id)
self._parse(text)
break
except (InvalidVideoIdException, UnknownConnectionError) as e:
raise e
except Exception as e:
err = e
time.sleep(2)
pass
else:
raise err
def _get_page_text(self, video_id):
url = f"https://www.youtube.com/embed/{video_id}"
resp = httpx.get(url, headers=headers)
resp.raise_for_status()
err = None
for _ in range(3):
try:
resp = self.client.get(url, headers=headers)
resp.raise_for_status()
break
except httpx.HTTPError as e:
err = e
time.sleep(3)
else:
raise UnknownConnectionError(str(err))
return resp.text
def _parse(self, text):
result = re.search(pattern, text)
if result is None:
raise PatternUnmatchError(text)
res = json.loads(result.group(1)[:-1])
response = self._get_item(res, item_response)
result = re.search(pattern2, text)
if result is None:
raise PatternUnmatchError(doc=text)
else:
self.new_pattern_text = True
decoder = json.JSONDecoder()
if self.new_pattern_text:
res = decoder.raw_decode(result.group(1))[0]
else:
res = decoder.raw_decode(result.group(1)[:-1])[0]
if self.new_pattern_text:
response = self._get_item(res, item_response2)
else:
response = self._get_item(res, item_response)
if response is None:
self._check_video_is_private(res.get("args"))
if self.new_pattern_text:
self._check_video_is_private(res.get("PLAYER_VARS"))
else:
self._check_video_is_private(res.get("args"))
self._renderer = self._get_item(json.loads(response), item_renderer)
if self._renderer is None:
raise InvalidVideoIdException(

View File

@@ -1,8 +1,12 @@
import datetime
import httpx
import json
import datetime
import os
import re
from .. import config
PATTERN = re.compile(r"(.*)\(([0-9]+)\)$")
def extract(url):
_session = httpx.Client(http2=True)
@@ -12,7 +16,26 @@ def extract(url):
json.dump(html.json(), f, ensure_ascii=False)
def save(data, filename, extention):
with open(filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention,
mode='w', encoding='utf-8') as f:
def save(data, filename, extention) -> str:
save_filename = filename + "_" + (datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + extention
with open(save_filename ,mode='w', encoding='utf-8') as f:
f.writelines(data)
return save_filename
def checkpath(filepath):
splitter = os.path.splitext(os.path.basename(filepath))
body = splitter[0]
extention = splitter[1]
newpath = filepath
counter = 1
while os.path.exists(newpath):
match = re.search(PATTERN, body)
if match:
counter = int(match[2]) + 1
num_with_bracket = f'({str(counter)})'
body = f'{match[1]}{num_with_bracket}'
else:
body = f'{body}({str(counter)})'
newpath = os.path.join(os.path.dirname(filepath), body + extention)
return newpath

View File

@@ -8,18 +8,21 @@ YT_VIDEO_ID_LENGTH = 11
def extract_video_id(url_or_id: str) -> str:
ret = ''
if '[' in url_or_id:
url_or_id = url_or_id.replace('[', '').replace(']', '')
if type(url_or_id) != str:
raise TypeError(f"{url_or_id}: URL or VideoID must be str, but {type(url_or_id)} is passed.")
if len(url_or_id) == YT_VIDEO_ID_LENGTH:
return url_or_id
match = re.search(PATTERN, url_or_id)
if match is None:
raise InvalidVideoIdException(url_or_id)
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
try:
ret = match.group(4)
except IndexError:
raise InvalidVideoIdException(url_or_id)
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
if ret is None or len(ret) != YT_VIDEO_ID_LENGTH:
raise InvalidVideoIdException(url_or_id)
raise InvalidVideoIdException(f"Invalid video id: {url_or_id}")
return ret

View File

@@ -1,4 +1,4 @@
httpx==0.14.1
protobuf==3.13.0
httpx[http2]
protobuf==3.14.0
pytz
urllib3

View File

@@ -1,4 +1,2 @@
mock
mocker
pytest
pytest_httpx
pytest-mock
pytest-httpx

View File

@@ -1,41 +0,0 @@
from pytchat.tool.mining import parser
import pytchat.config as config
import httpx
import json
from pytchat.paramgen import arcparam_mining as arcparam
def test_arcparam_e(mocker):
try:
arcparam.getparam("01234567890", -1)
assert False
except ValueError:
assert True
def test_arcparam_0(mocker):
param = arcparam.getparam("01234567890", 0)
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"
def test_arcparam_1(mocker):
param = arcparam.getparam("01234567890", seektime=100000)
print(param)
assert param == "op2w0wQzGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABWgUQgMLXL2AEcgIIAXgB"
def test_arcparam_2(mocker):
param = arcparam.getparam("PZz9NB0-Z64", 1)
url = f"https://www.youtube.com/live_chat_replay?continuation={param}&playerOffsetMs=1000&pbj=1"
resp = httpx.Client(http2=True).get(url, headers=config.headers)
jsn = json.loads(resp.text)
_, chatdata = parser.parse(jsn[1])
test_id = chatdata[0]["addChatItemAction"]["item"]["liveChatPaidMessageRenderer"]["id"]
print(test_id)
assert test_id == "ChwKGkNKSGE0YnFJeWVBQ0ZWcUF3Z0VkdGIwRm9R"
def test_arcparam_3(mocker):
param = arcparam.getparam("01234567890")
assert param == "op2w0wQsGiBDZzhhRFFvTE1ERXlNelExTmpjNE9UQWdBUSUzRCUzREABYARyAggBeAE%3D"

View File

@@ -1,8 +1,17 @@
import json
from datetime import datetime
from pytchat.parser.live import Parser
from pytchat.processors.default.processor import DefaultProcessor
TEST_TIMETSTAMP = 1570678496000000
def get_local_datetime(timestamp):
dt = datetime.fromtimestamp(timestamp / 1000000)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def test_textmessage(mocker):
'''text message'''
processor = DefaultProcessor()
@@ -17,11 +26,10 @@ def test_textmessage(mocker):
}
ret = processor.process([data]).items[0]
assert ret.chattype == "textMessage"
assert ret.id == "dummy_id"
assert ret.message == "dummy_message"
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.author.name == "author_name"
assert ret.author.channelId == "author_channel_id"
assert ret.author.channelUrl == "http://www.youtube.com/channel/author_channel_id"
@@ -47,13 +55,12 @@ def test_textmessage_replay_member(mocker):
}
ret = processor.process([data]).items[0]
assert ret.chattype == "textMessage"
assert ret.type == "textMessage"
assert ret.id == "dummy_id"
assert ret.message == "dummy_message"
assert ret.messageEx == ["dummy_message"]
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == "1:23:45"
assert ret.author.name == "author_name"
assert ret.author.channelId == "author_channel_id"
@@ -80,14 +87,12 @@ def test_superchat(mocker):
}
ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "superChat"
assert ret.type == "superChat"
assert ret.id == "dummy_id"
assert ret.message == "dummy_message"
assert ret.messageEx == ["dummy_message"]
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == ""
assert ret.amountValue == 800
assert ret.amountString == "¥800"
@@ -124,14 +129,12 @@ def test_supersticker(mocker):
}
ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "superSticker"
assert ret.type == "superSticker"
assert ret.id == "dummy_id"
assert ret.message == ""
assert ret.messageEx == []
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == ""
assert ret.amountValue == 200
assert ret.amountString == "¥200"
@@ -167,14 +170,12 @@ def test_sponsor(mocker):
}
ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "newSponsor"
assert ret.type == "newSponsor"
assert ret.id == "dummy_id"
assert ret.message == "新規メンバー"
assert ret.messageEx == ["新規メンバー"]
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == ""
assert ret.bgColor == 0
assert ret.author.name == "author_name"
@@ -202,14 +203,12 @@ def test_sponsor_legacy(mocker):
}
ret = processor.process([data]).items[0]
print(json.dumps(chatdata, ensure_ascii=False))
assert ret.chattype == "newSponsor"
assert ret.type == "newSponsor"
assert ret.id == "dummy_id"
assert ret.message == "新規メンバー / ようこそ、author_name"
assert ret.messageEx == ["新規メンバー / ようこそ、author_name"]
assert ret.timestamp == 1570678496000
assert ret.datetime == "2019-10-10 12:34:56"
assert ret.datetime == get_local_datetime(TEST_TIMETSTAMP)
assert ret.elapsedTime == ""
assert ret.bgColor == 0
assert ret.author.name == "author_name"

View File

@@ -1,7 +1,6 @@
from json.decoder import JSONDecodeError
from pytchat.tool.videoinfo import VideoInfo
from pytchat.exceptions import InvalidVideoIdException, PatternUnmatchError
from pytchat import util
from pytchat.exceptions import InvalidVideoIdException
def _open_file(path):
@@ -14,7 +13,7 @@ def _set_test_data(filepath, mocker):
response_mock = mocker.Mock()
response_mock.status_code = 200
response_mock.text = _text
mocker.patch('httpx.get').return_value = response_mock
mocker.patch('httpx.Client.get').return_value = response_mock
def test_archived_page(mocker):
@@ -32,7 +31,7 @@ def test_archived_page(mocker):
def test_live_page(mocker):
_set_test_data('tests/testdata/videoinfo/live_page.txt', mocker)
info = VideoInfo('__test_id__')
'''live page :duration = 0'''
'''live page: duration==0'''
assert info.get_duration() == 0
assert info.video_id == '__test_id__'
assert info.get_channel_name() == 'BGM channel'
@@ -86,5 +85,17 @@ def test_pattern_unmatch(mocker):
try:
_ = VideoInfo('__test_id__')
assert False
except PatternUnmatchError:
except JSONDecodeError:
assert True
def test_extradata_handling(mocker):
'''Test case the extracted data are JSON lines.'''
_set_test_data(
'tests/testdata/videoinfo/extradata_page.txt', mocker)
try:
_ = VideoInfo('__test_id__')
assert True
except JSONDecodeError as e:
print(e.doc)
assert False

File diff suppressed because one or more lines are too long