mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-16 22:25:40 +07:00
Compare commits
168 Commits
2022.08.18
...
2022.10.04
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
57fb88093e | ||
|
|
4e0511f27d | ||
|
|
304ad45a9b | ||
|
|
878eac3e2e | ||
|
|
34859e4b32 | ||
|
|
143a2ccab3 | ||
|
|
1e0daeb314 | ||
|
|
7f5b3cb8b3 | ||
|
|
c53e5cf59f | ||
|
|
c7f540ea1e | ||
|
|
12f153a827 | ||
|
|
0d887f273a | ||
|
|
4d37720a0c | ||
|
|
dd4411aac2 | ||
|
|
1d77d8ce07 | ||
|
|
a057779d5e | ||
|
|
7474e4531e | ||
|
|
d3a3d7f0cc | ||
|
|
8671f995cc | ||
|
|
4a61501db9 | ||
|
|
7244895bde | ||
|
|
177662e0f2 | ||
|
|
f48ab881f6 | ||
|
|
eb2d9504b9 | ||
|
|
8a04054647 | ||
|
|
8b7fb8b60d | ||
|
|
a83333c432 | ||
|
|
573a98d6f0 | ||
|
|
af7a5eef2f | ||
|
|
576faf00b2 | ||
|
|
81b6102d20 | ||
|
|
acf306d1f9 | ||
|
|
20a7304e4c | ||
|
|
2e0f8d4f6e | ||
|
|
7e378287c4 | ||
|
|
9cc5aed990 | ||
|
|
48f535f5f8 | ||
|
|
8dbad2a439 | ||
|
|
11398b922c | ||
|
|
dfea94f8f6 | ||
|
|
f1aae71568 | ||
|
|
a5642f2c4a | ||
|
|
10e2eb4f81 | ||
|
|
c9eba8075f | ||
|
|
9d69c4e4b4 | ||
|
|
292fdad297 | ||
|
|
c04cc2e28e | ||
|
|
7a32c70d13 | ||
|
|
709ee21417 | ||
|
|
1fb53b946c | ||
|
|
1dd18a8808 | ||
|
|
0a5095fe8d | ||
|
|
0f60ba6e65 | ||
|
|
1534aba865 | ||
|
|
0ca0f88121 | ||
|
|
0500ee3d81 | ||
|
|
46a5b335e7 | ||
|
|
914491b8e0 | ||
|
|
ab029d7e92 | ||
|
|
0bd5a039ea | ||
|
|
5c8b2ee9ec | ||
|
|
faf7863bb0 | ||
|
|
d42763a443 | ||
|
|
3c757d5ed2 | ||
|
|
f55523cfdd | ||
|
|
32972518da | ||
|
|
2e7675489f | ||
|
|
80eb0bd9b9 | ||
|
|
4cca2eb1bf | ||
|
|
1c09783f7a | ||
|
|
163281178a | ||
|
|
2fa669f759 | ||
|
|
8ca48a1a54 | ||
|
|
b27bc13af6 | ||
|
|
f7c5a5e967 | ||
|
|
fada8272b6 | ||
|
|
46d72cd2c7 | ||
|
|
19b4e59a1e | ||
|
|
dab284f80f | ||
|
|
9665f15a96 | ||
|
|
2b24afa6d7 | ||
|
|
3166e6840c | ||
|
|
8817a80d3a | ||
|
|
5736d79172 | ||
|
|
fc2ba496fd | ||
|
|
2b9d02167f | ||
|
|
2314b4d89f | ||
|
|
1060f82f89 | ||
|
|
22df97f9c5 | ||
|
|
9c935fbc72 | ||
|
|
deae7c1711 | ||
|
|
941e881e1f | ||
|
|
0cb0fdbbfe | ||
|
|
0831d95c46 | ||
|
|
c26f9b991a | ||
|
|
0c0b78b273 | ||
|
|
3ffb2f5bea | ||
|
|
ae1035646a | ||
|
|
1015ceeeaf | ||
|
|
17ffed1842 | ||
|
|
be9c0884d7 | ||
|
|
48c8424bd9 | ||
|
|
7657ec7ed6 | ||
|
|
07a1250e0e | ||
|
|
69082b38dc | ||
|
|
aa824dd10b | ||
|
|
a12d03e15d | ||
|
|
1a7c9fad9f | ||
|
|
3c7a276234 | ||
|
|
d6f8871964 | ||
|
|
5469a4ab11 | ||
|
|
2c475e48b5 | ||
|
|
7c6eb424d3 | ||
|
|
adba24d207 | ||
|
|
5d7c7d6569 | ||
|
|
d2c8aadf79 | ||
|
|
1ac7f46184 | ||
|
|
05deb747bb | ||
|
|
b505e8517a | ||
|
|
f2e9fa3ef7 | ||
|
|
50a399326f | ||
|
|
1ff88b7aec | ||
|
|
825d3ce386 | ||
|
|
92aa6d6883 | ||
|
|
b2a4db425b | ||
|
|
de49cdbe9d | ||
|
|
9f9c85dda4 | ||
|
|
11734714c2 | ||
|
|
b86ca447ce | ||
|
|
f8c7ba9984 | ||
|
|
76f2bb175d | ||
|
|
f26af78a8a | ||
|
|
bfbecd1174 | ||
|
|
9bd13fe5bb | ||
|
|
459262ac97 | ||
|
|
82ea226c61 | ||
|
|
da4db748fa | ||
|
|
e1eabd7beb | ||
|
|
d81ba7d491 | ||
|
|
5135ed3d4a | ||
|
|
c4b2df872d | ||
|
|
224b5a35f7 | ||
|
|
50ac0e5416 | ||
|
|
e0992d5558 | ||
|
|
5e01315aa1 | ||
|
|
4e4982ab5b | ||
|
|
89e4d86171 | ||
|
|
a1af516259 | ||
|
|
1d64a59547 | ||
|
|
ca7f8b8f31 | ||
|
|
164b03c486 | ||
|
|
e5458d1d88 | ||
|
|
b5e7a2e69d | ||
|
|
2516cafb28 | ||
|
|
fd404bec7e | ||
|
|
fe7866d0ed | ||
|
|
5314b52192 | ||
|
|
13db4e7b9e | ||
|
|
07275b708b | ||
|
|
b85703d11a | ||
|
|
992dc6b486 | ||
|
|
822d66e591 | ||
|
|
8d1ad6378f | ||
|
|
2d1019542a | ||
|
|
b25cac650f | ||
|
|
90a1df305b | ||
|
|
0a6b4b82e9 | ||
|
|
1704c47ba8 |
10
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
10
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -18,11 +18,11 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -62,7 +62,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -70,8 +70,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,11 +18,11 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
|
||||
- label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -74,7 +74,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -82,8 +82,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -70,7 +70,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -78,8 +78,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
10
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
10
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -18,11 +18,11 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -55,7 +55,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -63,8 +63,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -20,7 +20,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -51,7 +51,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -59,7 +59,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -26,7 +26,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -57,7 +57,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -65,7 +65,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.19, Current version: 2022.08.19
|
||||
yt-dlp is up to date (2022.08.19)
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
@@ -16,7 +16,7 @@ body:
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
@@ -16,7 +16,7 @@ body:
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
|
||||
- label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
@@ -16,7 +16,7 @@ body:
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
- label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -194,7 +194,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python -m pip install --upgrade pip setuptools wheel py2exe
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -230,7 +230,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -264,7 +264,7 @@ jobs:
|
||||
- name: Make Update spec
|
||||
run: |
|
||||
echo "# This file is used for regulating self-update" >> _update_spec
|
||||
echo "lock 2022.08.18.36 .+ Python 3.6" >> _update_spec
|
||||
echo "lock 2022.07.18 .+ Python 3.6" >> _update_spec
|
||||
- name: Make SHA2-SUMS files
|
||||
run: |
|
||||
sha256sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-256SUMS
|
||||
|
||||
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# CPython 3.9 is in quick-test
|
||||
python-version: ['3.6', '3.7', '3.10', 3.11-dev, pypy-3.6, pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
|
||||
2
.github/workflows/download.yml
vendored
2
.github/workflows/download.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.6', '3.7', '3.10', 3.11-dev, pypy-3.6, pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -33,13 +33,14 @@ cookies
|
||||
*.jpeg
|
||||
*.jpg
|
||||
*.m4a
|
||||
*.mpga
|
||||
*.m4v
|
||||
*.mhtml
|
||||
*.mkv
|
||||
*.mov
|
||||
*.mp3
|
||||
*.mp4
|
||||
*.mpga
|
||||
*.oga
|
||||
*.ogg
|
||||
*.opus
|
||||
*.png
|
||||
@@ -47,6 +48,7 @@ cookies
|
||||
*.srt
|
||||
*.swf
|
||||
*.swp
|
||||
*.tt
|
||||
*.ttml
|
||||
*.url
|
||||
*.vtt
|
||||
@@ -85,6 +87,7 @@ updates_key.pem
|
||||
.tox
|
||||
*.class
|
||||
*.isorted
|
||||
*.stackdump
|
||||
|
||||
# Generated
|
||||
AUTHORS
|
||||
|
||||
@@ -161,7 +161,7 @@ ## Adding new feature or making overarching changes
|
||||
|
||||
## Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](https://www.github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#is-the-website-primarily-used-for-piracy)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
|
||||
32
CONTRIBUTORS
32
CONTRIBUTORS
@@ -299,3 +299,35 @@ bashonly
|
||||
jacobtruman
|
||||
masta79
|
||||
palewire
|
||||
cgrigis
|
||||
DavidH-2022
|
||||
dfaker
|
||||
jackyyf
|
||||
ohaiibuzzle
|
||||
SamantazFox
|
||||
shreyasminocha
|
||||
tejasa97
|
||||
xenov
|
||||
satan1st
|
||||
0xGodspeed
|
||||
5736d79
|
||||
587021c
|
||||
basrieter
|
||||
Bobscorn
|
||||
CNugteren
|
||||
columndeeply
|
||||
DoubleCouponDay
|
||||
Fabi019
|
||||
GautamMKGarg
|
||||
Grub4K
|
||||
itachi-19
|
||||
jeroenj
|
||||
josanabr
|
||||
LiviaMedeiros
|
||||
nikita-moor
|
||||
snapdgn
|
||||
SuperSonicHub1
|
||||
tannertechnology
|
||||
Timendum
|
||||
tobi1805
|
||||
TokyoBlackHole
|
||||
|
||||
155
Changelog.md
155
Changelog.md
@@ -11,11 +11,158 @@ # Instuctions for creating release
|
||||
-->
|
||||
|
||||
|
||||
### 2022.08.18.36
|
||||
### 2022.10.04
|
||||
|
||||
* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly)
|
||||
* Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* Allow plugin extractors to replace the built-in ones
|
||||
* Don't download entire video when no matching `--download-sections`
|
||||
* Fix `--config-location -`
|
||||
* Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad)
|
||||
* Fix for when playlists don't have `webpage_url`
|
||||
* Support environment variables in `--ffmpeg-location`
|
||||
* Workaround `libc_ver` not be available on Windows Store version of Python
|
||||
* [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan)
|
||||
* [outtmpl] Make `%s` work in strfformat for all systems
|
||||
* [jsinterp] Workaround operator associativity issue
|
||||
* [cookies] Let `_get_mac_keyring_password` fail gracefully
|
||||
* [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K)
|
||||
* [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0)
|
||||
* [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move)
|
||||
* [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo)
|
||||
* [extractor] Fix `fatal=False` in `RetryManager`
|
||||
* [extractor] Improve json-ld extraction
|
||||
* [extractor] Make `_search_json` able to parse lists
|
||||
* [extractor] Escape `%` in `representation_id` of m3u8
|
||||
* [extractor/generic] Pass through referer from json-ld
|
||||
* [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0)
|
||||
* [utils] `js_to_json`: Improve
|
||||
* [utils] `Popen.run`: Fix default return in binary mode
|
||||
* [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K)
|
||||
* [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr)
|
||||
* [docs] Misc Improvements
|
||||
* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191)
|
||||
* [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019)
|
||||
* [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0)
|
||||
* [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019)
|
||||
* [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj)
|
||||
* [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen)
|
||||
* [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn)
|
||||
* [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
|
||||
* [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply)
|
||||
* [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor)
|
||||
* [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies)
|
||||
* [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/youtube] Detect `lazy-load-for-videos` embeds
|
||||
* [extractor/youtube] Do not warn on duplicate chapters
|
||||
* [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube:tab] Improve continuation items extraction
|
||||
* [extractor/youtube:tab] Support `reporthistory` page
|
||||
* [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole)
|
||||
* [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K)
|
||||
* [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen)
|
||||
* [extractor/bandcamp] Extract `uploader_url`
|
||||
* [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix)
|
||||
* [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix)
|
||||
* [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/BiliIntlSeries] Fix `_VALID_URL`
|
||||
* [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed)
|
||||
* [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K)
|
||||
* [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum)
|
||||
* [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros)
|
||||
* [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn)
|
||||
* [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05)
|
||||
* [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/MLBTV] Detect live streams
|
||||
* [extractor/motorsport] Support native embeds
|
||||
* [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19)
|
||||
* [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology)
|
||||
* [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
|
||||
* [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/patreon] Sort formats
|
||||
* [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968)
|
||||
* [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1)
|
||||
* [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid)
|
||||
* [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805)
|
||||
* [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE`
|
||||
* [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K)
|
||||
* [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
|
||||
* [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz)
|
||||
|
||||
|
||||
### 2022.09.01
|
||||
|
||||
* Add option `--use-extractors`
|
||||
* Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7)
|
||||
* Add yt-dlp version to infojson
|
||||
* Fix `--break-per-url --max-downloads`
|
||||
* Fix bug in `--alias`
|
||||
* [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [downloader/external] Smarter detection of executable
|
||||
* [extractor/generic] Don't return JW player without formats
|
||||
* [FormatSort] Fix `aext` for `--prefer-free-formats`
|
||||
* [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0)
|
||||
* [cache] Mechanism to invalidate old cache
|
||||
* [utils] Add `deprecation_warning`
|
||||
* [utils] Add `orderedSet_from_options`
|
||||
* [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st)
|
||||
* [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev)
|
||||
* [test] Fix `test_youtube_signature`
|
||||
* [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97)
|
||||
* [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/youtube] Improvements to nsig extraction
|
||||
* [extractor/youtube] Fix bug in format sorting
|
||||
* [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox)
|
||||
* [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis)
|
||||
* [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf)
|
||||
* [extractor/BiliBiliSearch] Don't sort by date
|
||||
* [extractor/BiliBiliSearch] Fix infinite loop
|
||||
* [extractor/bitchute] Mark errors as expected
|
||||
* [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1)
|
||||
* [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle)
|
||||
* [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova)
|
||||
* [extractor/mediaset] Fix embed extraction
|
||||
* [extractor/mixcloud] All formats are audio-only
|
||||
* [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha)
|
||||
* [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker)
|
||||
* [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf)
|
||||
* [extractor/vimeo:user] Fix `_VALID_URL`
|
||||
|
||||
* Revert "[ffmpeg] Set `ffmpeg_location` in a contextvar"
|
||||
* Revert "Remove Python 3.6 support"
|
||||
* Use `compat.re`
|
||||
|
||||
### 2022.08.19
|
||||
|
||||
|
||||
14
Makefile
14
Makefile
@@ -17,8 +17,8 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
clean-test:
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.mpga *.m4v *.mhtml *.mkv *.mov \
|
||||
*.mp3 *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
|
||||
*.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
@@ -33,7 +33,6 @@ completion-zsh: completions/zsh/_yt-dlp
|
||||
lazy-extractors: yt_dlp/extractor/lazy_extractors.py
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
DESTDIR ?= .
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
MANDIR ?= $(PREFIX)/man
|
||||
SHAREDIR ?= $(PREFIX)/share
|
||||
@@ -75,17 +74,16 @@ offlinetest: codetest
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
# XXX: This is hard to maintain
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat \
|
||||
yt_dlp/extractor/anvato_token_generator
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py zip/yt_dlp/*/*/*.py
|
||||
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py
|
||||
mv zip/yt_dlp/__main__.py zip/
|
||||
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py yt_dlp/*/*/*.py __main__.py
|
||||
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py
|
||||
rm -rf zip
|
||||
echo '#!$(PYTHON)' > yt-dlp
|
||||
cat yt-dlp.zip >> yt-dlp
|
||||
@@ -134,7 +132,7 @@ yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscrip
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
yt-dlp.tar.gz: all
|
||||
@tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
--exclude '*.pyc' \
|
||||
|
||||
283
README.md
283
README.md
@@ -3,7 +3,7 @@
|
||||
|
||||
[](#readme)
|
||||
|
||||
[](#release-files "Release")
|
||||
[](#installation "Installation")
|
||||
[](https://pypi.org/project/yt-dlp "PyPi")
|
||||
[](Collaborators.md#collaborators "Donate")
|
||||
[](https://matrix.to/#/#yt-dlp:matrix.org "Matrix")
|
||||
@@ -25,6 +25,7 @@
|
||||
* [NEW FEATURES](#new-features)
|
||||
* [Differences in default behavior](#differences-in-default-behavior)
|
||||
* [INSTALLATION](#installation)
|
||||
* [Detailed instructions](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
|
||||
* [Update](#update)
|
||||
* [Release Files](#release-files)
|
||||
* [Dependencies](#dependencies)
|
||||
@@ -47,9 +48,10 @@
|
||||
* [SponsorBlock Options](#sponsorblock-options)
|
||||
* [Extractor Options](#extractor-options)
|
||||
* [CONFIGURATION](#configuration)
|
||||
* [Configuration file encoding](#configuration-file-encoding)
|
||||
* [Authentication with .netrc file](#authentication-with-netrc-file)
|
||||
* [Notes about environment variables](#notes-about-environment-variables)
|
||||
* [OUTPUT TEMPLATE](#output-template)
|
||||
* [Output template and Windows batch files](#output-template-and-windows-batch-files)
|
||||
* [Output template examples](#output-template-examples)
|
||||
* [FORMAT SELECTION](#format-selection)
|
||||
* [Filtering Formats](#filtering-formats)
|
||||
@@ -65,15 +67,16 @@
|
||||
* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp)
|
||||
* [Opening an Issue](CONTRIBUTING.md#opening-an-issue)
|
||||
* [Developer Instructions](CONTRIBUTING.md#developer-instructions)
|
||||
* [MORE](#more)
|
||||
* [WIKI](https://github.com/yt-dlp/yt-dlp/wiki)
|
||||
* [FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ)
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/b0a60ce](https://github.com/ytdl-org/youtube-dl/commit/b0a60ce2032172aeaaf27fe3866ab72768f10cb2)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
@@ -87,7 +90,7 @@ # NEW FEATURES
|
||||
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
|
||||
|
||||
* **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections`
|
||||
|
||||
@@ -139,8 +142,9 @@ ### Differences in default behavior
|
||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* YouTube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||
@@ -157,76 +161,26 @@ ### Differences in default behavior
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
You can install yt-dlp using one of the following methods:
|
||||
|
||||
### Using the release binary
|
||||
|
||||
You can simply download the [correct binary file](#release-files) for your OS
|
||||
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
[](#release-files)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases)
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
You can install yt-dlp using [the binaries](#release-files), [PIP](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions
|
||||
|
||||
<!-- TODO: Move to Wiki -->
|
||||
In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways:
|
||||
|
||||
```
|
||||
sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
```
|
||||
sudo wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
```
|
||||
sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp --dir /usr/local/bin -o yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
|
||||
### With [PIP](https://pypi.org/project/pip)
|
||||
|
||||
You can install the [PyPI package](https://pypi.org/project/yt-dlp) with:
|
||||
```
|
||||
python3 -m pip install -U yt-dlp
|
||||
```
|
||||
|
||||
You can install without any of the optional dependencies using:
|
||||
```
|
||||
python3 -m pip install --no-deps -U yt-dlp
|
||||
```
|
||||
|
||||
If you want to be on the cutting edge, you can also install the master branch with:
|
||||
```
|
||||
python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
|
||||
```
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`
|
||||
|
||||
<!-- TODO: Add to Wiki, Remove Taps -->
|
||||
### With [Homebrew](https://brew.sh)
|
||||
|
||||
macOS or Linux users that are using Homebrew can also install it by:
|
||||
|
||||
```
|
||||
brew install yt-dlp/taps/yt-dlp
|
||||
```
|
||||
|
||||
## UPDATE
|
||||
You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary)
|
||||
You can use `yt-dlp -U` to update if you are [using the release binaries](#release-files)
|
||||
|
||||
If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program
|
||||
If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
|
||||
|
||||
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) or refer their documentation
|
||||
|
||||
If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp`
|
||||
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
## RELEASE FILES
|
||||
@@ -255,11 +209,14 @@ #### Misc
|
||||
|
||||
File|Description
|
||||
:---|:---
|
||||
[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc
|
||||
[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball
|
||||
[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums
|
||||
[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
|
||||
Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
|
||||
## DEPENDENCIES
|
||||
Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
|
||||
|
||||
@@ -295,7 +252,7 @@ ### Misc
|
||||
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
|
||||
* Any external downloader that you want to use with `--downloader`
|
||||
|
||||
#### Deprecated
|
||||
### Deprecated
|
||||
|
||||
* [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal)
|
||||
* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md)
|
||||
@@ -320,12 +277,12 @@ ### Standalone PyInstaller Builds
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
|
||||
Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
|
||||
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
|
||||
|
||||
### Platform-independent Binary (UNIX)
|
||||
You will need the build tools `python` (3.6+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
|
||||
You will need the build tools `python` (3.7+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
|
||||
|
||||
After installing these, simply run `make`.
|
||||
|
||||
@@ -347,7 +304,7 @@ ### Related scripts
|
||||
* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable
|
||||
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
|
||||
|
||||
You can also fork the project on github and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release
|
||||
You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release
|
||||
|
||||
# USAGE AND OPTIONS
|
||||
|
||||
@@ -375,7 +332,13 @@ ## General Options:
|
||||
--list-extractors List all supported extractors and exit
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors and exit
|
||||
--force-generic-extractor Force extraction to use the generic extractor
|
||||
--use-extractors NAMES Extractor names to use separated by commas.
|
||||
You can also use regexes, "all", "default"
|
||||
and "end" (end URL matching); e.g. --ies
|
||||
"holodex.*,end,youtube". Prefix the name
|
||||
with a "-" to exclude it, e.g. --ies
|
||||
default,-generic. Use --list-extractors for
|
||||
a list of extractor names. (Alias: --ies)
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
|
||||
"gvsearch2:python" downloads two videos from
|
||||
google videos for the search term "python".
|
||||
@@ -524,8 +487,8 @@ ## Video Selection:
|
||||
a file that is in the archive
|
||||
--break-on-reject Stop the download process when encountering
|
||||
a file that has been filtered out
|
||||
--break-per-input Make --break-on-existing, --break-on-reject
|
||||
and --max-downloads act only on the current
|
||||
--break-per-input --break-on-existing, --break-on-reject,
|
||||
--max-downloads, and autonumber resets per
|
||||
input URL
|
||||
--no-break-per-input --break-on-existing and similar options
|
||||
terminates the entire download queue
|
||||
@@ -601,7 +564,7 @@ ## Download Options:
|
||||
Needs ffmpeg. This option can be used
|
||||
multiple times to download multiple
|
||||
sections, e.g. --download-sections
|
||||
"*10:15-15:00" --download-sections "intro"
|
||||
"*10:15-inf" --download-sections "intro"
|
||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||
use (optionally) prefixed by the protocols
|
||||
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||
@@ -700,24 +663,25 @@ ## Filesystem Options:
|
||||
and dump cookie jar in
|
||||
--no-cookies Do not read/dump cookies from/to file
|
||||
(default)
|
||||
--cookies-from-browser BROWSER[+KEYRING][:PROFILE]
|
||||
The name of the browser and (optionally) the
|
||||
name/path of the profile to load cookies
|
||||
from, separated by a ":". Currently
|
||||
supported browsers are: brave, chrome,
|
||||
chromium, edge, firefox, opera, safari,
|
||||
vivaldi. By default, the most recently
|
||||
accessed profile is used. The keyring used
|
||||
for decrypting Chromium cookies on Linux can
|
||||
be (optionally) specified after the browser
|
||||
name separated by a "+". Currently supported
|
||||
keyrings are: basictext, gnomekeyring, kwallet
|
||||
--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]
|
||||
The name of the browser to load cookies
|
||||
from. Currently supported browsers are:
|
||||
brave, chrome, chromium, edge, firefox,
|
||||
opera, safari, vivaldi. Optionally, the
|
||||
KEYRING used for decrypting Chromium cookies
|
||||
on Linux, the name/path of the PROFILE to
|
||||
load cookies from, and the CONTAINER name
|
||||
(if Firefox) ("none" for no container) can
|
||||
be given with their respective seperators.
|
||||
By default, all containers of the most
|
||||
recently accessed profile are used.
|
||||
Currently supported keyrings are: basictext,
|
||||
gnomekeyring, kwallet
|
||||
--no-cookies-from-browser Do not load cookies from browser (default)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information (such
|
||||
as client ids and signatures) permanently.
|
||||
By default $XDG_CACHE_HOME/yt-dlp or
|
||||
~/.cache/yt-dlp
|
||||
--cache-dir DIR Location in the filesystem where yt-dlp can
|
||||
store some downloaded information (such as
|
||||
client ids and signatures) permanently. By
|
||||
default ${XDG_CACHE_HOME}/yt-dlp
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
@@ -1116,29 +1080,34 @@ ## Extractor Options:
|
||||
--no-hls-split-discontinuity Do not split HLS playlists to different
|
||||
formats at discontinuities such as ad breaks
|
||||
(default)
|
||||
--extractor-args KEY:ARGS Pass these arguments to the extractor. See
|
||||
"EXTRACTOR ARGUMENTS" for details. You can
|
||||
use this option multiple times to give
|
||||
--extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor.
|
||||
See "EXTRACTOR ARGUMENTS" for details. You
|
||||
can use this option multiple times to give
|
||||
arguments for different extractors
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations:
|
||||
|
||||
1. **Main Configuration**: The file given by `--config-location`
|
||||
1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`<root dir>/yt_dlp/__main__.py`), the root directory is used instead.
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given
|
||||
1. **Main Configuration**:
|
||||
* The file given by `--config-location`
|
||||
1. **Portable Configuration**: (Recommended for portable installations)
|
||||
* If using a binary, `yt-dlp.conf` in the same directory as the binary
|
||||
* If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp`
|
||||
1. **Home Configuration**:
|
||||
* `yt-dlp.conf` in the home path given by `-P`
|
||||
* If `-P` is not given, the current directory is searched
|
||||
1. **User Configuration**:
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS)
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp.conf`
|
||||
* `%APPDATA%/yt-dlp/config` (recommended on Windows)
|
||||
* `%APPDATA%/yt-dlp/config.txt`
|
||||
* `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS)
|
||||
* `${XDG_CONFIG_HOME}/yt-dlp.conf`
|
||||
* `${APPDATA}/yt-dlp/config` (recommended on Windows)
|
||||
* `${APPDATA}/yt-dlp/config.txt`
|
||||
* `~/yt-dlp.conf`
|
||||
* `~/yt-dlp.conf.txt`
|
||||
|
||||
`%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\<user name>\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`), or `%HOMEDRIVE%%HOMEPATH%`
|
||||
|
||||
1. **System Configuration**: `/etc/yt-dlp.conf`
|
||||
See also: [Notes about environment variables](#notes-about-environment-variables)
|
||||
1. **System Configuration**:
|
||||
* `/etc/yt-dlp.conf`
|
||||
|
||||
E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
```
|
||||
@@ -1157,22 +1126,22 @@ # Save all videos under YouTube directory in your home directory
|
||||
-o ~/YouTube/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
|
||||
|
||||
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
|
||||
|
||||
### Config file encoding
|
||||
### Configuration file encoding
|
||||
|
||||
The config files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
|
||||
The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
|
||||
|
||||
If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
touch ${HOME}/.netrc
|
||||
chmod a-rwx,u+rw ${HOME}/.netrc
|
||||
```
|
||||
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
|
||||
```
|
||||
@@ -1185,7 +1154,14 @@ ### Authentication with `.netrc` file
|
||||
```
|
||||
To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration).
|
||||
|
||||
The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it is `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`) or `%HOMEDRIVE%%HOMEPATH%`
|
||||
The default location of the .netrc file is `~` (see below).
|
||||
|
||||
### Notes about environment variables
|
||||
* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
|
||||
* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
|
||||
* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache`
|
||||
* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise
|
||||
* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming`
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -1201,7 +1177,7 @@ # OUTPUT TEMPLATE
|
||||
|
||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. E.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
|
||||
@@ -1209,9 +1185,9 @@ # OUTPUT TEMPLATE
|
||||
|
||||
1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
||||
|
||||
1. **Replacement**: A replacement value can specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
|
||||
1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. E.g. `%(uploader|Unknown)s`
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
|
||||
@@ -1229,7 +1205,6 @@ # OUTPUT TEMPLATE
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `fulltitle` (string): Video title ignoring live timestamp and generic title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
- `description` (string): The description of the video
|
||||
@@ -1264,26 +1239,6 @@ # OUTPUT TEMPLATE
|
||||
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
|
||||
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
||||
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
||||
- `format` (string): A human-readable description of the format
|
||||
- `format_id` (string): Format code specified by `--format`
|
||||
- `format_note` (string): Additional info about the format
|
||||
- `width` (numeric): Width of the video
|
||||
- `height` (numeric): Height of the video
|
||||
- `resolution` (string): Textual description of width and height
|
||||
- `tbr` (numeric): Average bitrate of audio and video in KBit/s
|
||||
- `abr` (numeric): Average audio bitrate in KBit/s
|
||||
- `acodec` (string): Name of the audio codec in use
|
||||
- `asr` (numeric): Audio sampling rate in Hertz
|
||||
- `vbr` (numeric): Average video bitrate in KBit/s
|
||||
- `fps` (numeric): Frame rate
|
||||
- `dynamic_range` (string): The dynamic range of the video
|
||||
- `audio_channels` (numeric): The number of audio channels
|
||||
- `stretched_ratio` (float): `width:height` of the video's pixels, if not square
|
||||
- `vcodec` (string): Name of the video codec in use
|
||||
- `container` (string): Name of the container format
|
||||
- `filesize` (numeric): The number of bytes, if known in advance
|
||||
- `filesize_approx` (numeric): An estimate for the number of bytes
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
@@ -1302,6 +1257,8 @@ # OUTPUT TEMPLATE
|
||||
- `webpage_url_basename` (string): The basename of the webpage URL
|
||||
- `webpage_url_domain` (string): The domain of the webpage URL
|
||||
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
|
||||
|
||||
All the fields in [Filtering Formats](#filtering-formats) can also be used
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
||||
@@ -1374,22 +1331,16 @@ # OUTPUT TEMPLATE
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
|
||||
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
#### Output template and Windows batch files
|
||||
|
||||
If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
#### Output template examples
|
||||
|
||||
```bash
|
||||
$ yt-dlp --get-filename -o "test video.%(ext)s" BaW_jenozKc
|
||||
$ yt-dlp --print filename -o "test video.%(ext)s" BaW_jenozKc
|
||||
test video.webm # Literal name with correct extension
|
||||
|
||||
$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters
|
||||
|
||||
$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.webm # Restricted file name
|
||||
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
@@ -1461,7 +1412,7 @@ # FORMAT SELECTION
|
||||
|
||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
@@ -1469,7 +1420,7 @@ # FORMAT SELECTION
|
||||
|
||||
**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video
|
||||
|
||||
Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
|
||||
## Filtering Formats
|
||||
|
||||
@@ -1478,6 +1429,7 @@ ## Filtering Formats
|
||||
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
|
||||
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `width`: Width of the video, if known
|
||||
- `height`: Height of the video, if known
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
@@ -1485,16 +1437,23 @@ ## Filtering Formats
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
- `audio_channels`: The number of audio channels
|
||||
- `stretched_ratio`: `width:height` of the video's pixels, if not square
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
|
||||
|
||||
- `url`: Video URL
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
- `dynamic_range`: The dynamic range of the video
|
||||
- `format_id`: A short description of the format
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_note`: Additional info about the format
|
||||
- `resolution`: Textual description of width and height
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
|
||||
|
||||
@@ -1510,8 +1469,8 @@ ## Sorting Formats
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `hasvid`: Gives priority to formats that has a video stream
|
||||
- `hasaud`: Gives priority to formats that has a audio stream
|
||||
- `hasvid`: Gives priority to formats that have a video stream
|
||||
- `hasaud`: Gives priority to formats that have an audio stream
|
||||
- `ie_pref`: The format preference
|
||||
- `lang`: The language preference
|
||||
- `quality`: The quality of the format
|
||||
@@ -1521,7 +1480,7 @@ ## Sorting Formats
|
||||
- `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other)
|
||||
- `codec`: Equivalent to `vcodec,acodec`
|
||||
- `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred.
|
||||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`.
|
||||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
|
||||
- `ext`: Equivalent to `vext,aext`
|
||||
- `filesize`: Exact filesize, if known in advance
|
||||
- `fs_approx`: Approximate filesize calculated from the manifests
|
||||
@@ -1681,7 +1640,7 @@ # MODIFYING METADATA
|
||||
|
||||
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
|
||||
|
||||
This option also has a few special uses:
|
||||
|
||||
@@ -1727,11 +1686,11 @@ # Set title as "Series name S01E05"
|
||||
$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
|
||||
|
||||
# Prioritize uploader as the "artist" field in video metadata
|
||||
$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --add-metadata
|
||||
$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata
|
||||
|
||||
# Set "comment" field in video metadata using description instead of webpage_url,
|
||||
# handling multiple lines correctly
|
||||
$ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --add-metadata
|
||||
$ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --embed-metadata
|
||||
|
||||
# Do not set any "synopsis" in the video metadata
|
||||
$ yt-dlp --parse-metadata ":(?P<meta_synopsis>)"
|
||||
@@ -1751,13 +1710,14 @@ # EXTRACTOR ARGUMENTS
|
||||
The following extractors use this feature:
|
||||
|
||||
#### youtube
|
||||
* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
|
||||
@@ -1766,16 +1726,16 @@ #### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
|
||||
#### funimation
|
||||
* `language`: Languages to extract, e.g. `funimation:language=english,japanese`
|
||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
#### crunchyroll
|
||||
* `language`: Languages to extract, e.g. `crunchyroll:language=jaJp`
|
||||
* `language`: Audio languages to extract, e.g. `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyrollbeta
|
||||
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
@@ -1819,6 +1779,8 @@ # PLUGINS
|
||||
|
||||
If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability
|
||||
|
||||
See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
|
||||
|
||||
|
||||
|
||||
# EMBEDDING YT-DLP
|
||||
@@ -2058,6 +2020,7 @@ #### Redundant options
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
|
||||
--force-generic-extractor --ies generic,default
|
||||
--exec-before-download CMD --exec "before_dl:CMD"
|
||||
--no-exec-before-download --no-exec
|
||||
--all-formats -f all
|
||||
@@ -2142,5 +2105,5 @@ #### Removed
|
||||
# CONTRIBUTING
|
||||
See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions)
|
||||
|
||||
# MORE
|
||||
For FAQ see the [youtube-dl README](https://github.com/ytdl-org/youtube-dl#faq)
|
||||
# WIKI
|
||||
See the [Wiki](https://github.com/yt-dlp/yt-dlp/wiki) for more information
|
||||
|
||||
@@ -11,14 +11,17 @@
|
||||
|
||||
# These bloat the lazy_extractors, so allow them to passthrough silently
|
||||
ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
|
||||
_WARNED = False
|
||||
|
||||
|
||||
class LazyLoadMetaClass(type):
|
||||
def __getattr__(cls, name):
|
||||
if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS:
|
||||
write_string(
|
||||
'WARNING: Falling back to normal extractor since lazy extractor '
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||
global _WARNED
|
||||
if ('_real_class' not in cls.__dict__
|
||||
and name not in ALLOWED_CLASSMETHODS and not _WARNED):
|
||||
_WARNED = True
|
||||
write_string('WARNING: Falling back to normal extractor since lazy extractor '
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||
return getattr(cls.real_class, name)
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -12,7 +13,9 @@
|
||||
from devscripts.utils import get_filename_args, read_file, write_file
|
||||
|
||||
NO_ATTR = object()
|
||||
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
|
||||
STATIC_CLASS_PROPERTIES = [
|
||||
'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
|
||||
]
|
||||
CLASS_METHODS = [
|
||||
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
|
||||
]
|
||||
@@ -48,12 +51,13 @@ def get_all_ies():
|
||||
PLUGINS_DIRNAME = 'ytdlp_plugins'
|
||||
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
|
||||
if os.path.exists(PLUGINS_DIRNAME):
|
||||
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
||||
# os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
|
||||
shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
||||
try:
|
||||
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
||||
finally:
|
||||
if os.path.exists(BLOCKED_DIRNAME):
|
||||
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
||||
shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
if [ -z $1 ]; then
|
||||
if [ -z "$1" ]; then
|
||||
test_set='test'
|
||||
elif [ $1 = 'core' ]; then
|
||||
elif [ "$1" = 'core' ]; then
|
||||
test_set="-m not download"
|
||||
elif [ $1 = 'download' ]; then
|
||||
elif [ "$1" = 'download' ]; then
|
||||
test_set="-m download"
|
||||
else
|
||||
echo 'Invalid test type "'$1'". Use "core" | "download"'
|
||||
echo 'Invalid test type "'"$1"'". Use "core" | "download"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
|
||||
def get_new_version(revision):
|
||||
return read_version()
|
||||
version = datetime.utcnow().strftime('%Y.%m.%d')
|
||||
|
||||
if revision:
|
||||
|
||||
@@ -10,6 +10,14 @@ per_file_ignores =
|
||||
devscripts/lazy_load_template.py: F401
|
||||
|
||||
|
||||
[autoflake]
|
||||
ignore-init-module-imports = true
|
||||
ignore-pass-after-docstring = true
|
||||
remove-all-unused-imports = true
|
||||
remove-duplicate-keys = true
|
||||
remove-unused-variables = true
|
||||
|
||||
|
||||
[tool:pytest]
|
||||
addopts = -ra -v --strict-markers
|
||||
markers =
|
||||
|
||||
4
setup.py
4
setup.py
@@ -32,7 +32,6 @@ def packages():
|
||||
|
||||
return [
|
||||
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
|
||||
'yt_dlp.extractor.anvato_token_generator',
|
||||
]
|
||||
|
||||
|
||||
@@ -126,7 +125,7 @@ def run(self):
|
||||
url='https://github.com/yt-dlp/yt-dlp',
|
||||
packages=packages(),
|
||||
install_requires=REQUIREMENTS,
|
||||
python_requires='>=3.6',
|
||||
python_requires='>=3.7',
|
||||
project_urls={
|
||||
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
|
||||
'Source': 'https://github.com/yt-dlp/yt-dlp',
|
||||
@@ -138,7 +137,6 @@ def run(self):
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
|
||||
@@ -3,11 +3,12 @@ # Supported sites
|
||||
- **0000studio:clip**
|
||||
- **17live**
|
||||
- **17live:clip**
|
||||
- **1News**: 1news.co.nz article videos
|
||||
- **1tv**: Первый канал
|
||||
- **20.detik.com**
|
||||
- **20min**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24tv.ua**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@@ -66,7 +67,6 @@ # Supported sites
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Angel**
|
||||
- **AnimalPlanet**
|
||||
- **AnimeOnDemand**: [<abbr title="netrc machine"><em>animeondemand</em></abbr>]
|
||||
- **ant1newsgr:article**: ant1news.gr articles
|
||||
- **ant1newsgr:embed**: ant1news.gr embedded videos
|
||||
- **ant1newsgr:watch**: ant1news.gr videos
|
||||
@@ -135,6 +135,7 @@ # Supported sites
|
||||
- **BehindKink**
|
||||
- **Bellator**
|
||||
- **BellMedia**
|
||||
- **BerufeTV**
|
||||
- **Bet**
|
||||
- **bfi:player**
|
||||
- **bfmtv**
|
||||
@@ -148,9 +149,11 @@ # Supported sites
|
||||
- **Bilibili category extractor**
|
||||
- **BilibiliAudio**
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BilibiliChannel**
|
||||
- **BiliBiliPlayer**
|
||||
- **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
|
||||
- **BilibiliSpaceAudio**
|
||||
- **BilibiliSpacePlaylist**
|
||||
- **BilibiliSpaceVideo**
|
||||
- **BiliIntl**: [<abbr title="netrc machine"><em>biliintl</em></abbr>]
|
||||
- **BiliIntlSeries**: [<abbr title="netrc machine"><em>biliintl</em></abbr>]
|
||||
- **BiliLive**
|
||||
@@ -168,6 +171,7 @@ # Supported sites
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
- **BooyahClips**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
@@ -180,6 +184,7 @@ # Supported sites
|
||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **Bundesliga**
|
||||
- **BusinessInsider**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
@@ -248,6 +253,7 @@ # Supported sites
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **CNNIndonesia**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
@@ -304,6 +310,7 @@ # Supported sites
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DestinationAmerica**
|
||||
- **DetikEmbed**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitalConcertHall**: [<abbr title="netrc machine"><em>digitalconcerthall</em></abbr>] DigitalConcertHall extractor
|
||||
@@ -364,6 +371,7 @@ # Supported sites
|
||||
- **Engadget**
|
||||
- **Epicon**
|
||||
- **EpiconSeries**
|
||||
- **Epoch**
|
||||
- **Eporner**
|
||||
- **EroProfile**: [<abbr title="netrc machine"><em>eroprofile</em></abbr>]
|
||||
- **EroProfile:album**
|
||||
@@ -377,6 +385,7 @@ # Supported sites
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EuropeanTour**
|
||||
- **Eurosport**
|
||||
- **EUScreen**
|
||||
- **EWETV**: [<abbr title="netrc machine"><em>ewetv</em></abbr>]
|
||||
- **EWETVLive**: [<abbr title="netrc machine"><em>ewetv</em></abbr>]
|
||||
@@ -477,6 +486,7 @@ # Supported sites
|
||||
- **google:podcasts:feed**
|
||||
- **GoogleDrive**
|
||||
- **GoogleDrive:Folder**
|
||||
- **GoPlay**: [<abbr title="netrc machine"><em>goplay</em></abbr>]
|
||||
- **GoPro**
|
||||
- **Goshgay**
|
||||
- **GoToStage**
|
||||
@@ -526,11 +536,14 @@ # Supported sites
|
||||
- **Hypem**
|
||||
- **Hytale**
|
||||
- **Icareus**
|
||||
- **iflix:episode**
|
||||
- **IflixSeries**
|
||||
- **ign.com**
|
||||
- **IGNArticle**
|
||||
- **IGNVideo**
|
||||
- **IHeartRadio**
|
||||
- **iheartradio:podcast**
|
||||
- **Iltalehti**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
@@ -553,6 +566,9 @@ # Supported sites
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: [<abbr title="netrc machine"><em>iqiyi</em></abbr>] 爱奇艺
|
||||
- **IslamChannel**
|
||||
- **IslamChannelSeries**
|
||||
- **IsraelNationalNews**
|
||||
- **ITProTV**
|
||||
- **ITProTVCourse**
|
||||
- **ITTF**
|
||||
@@ -685,6 +701,7 @@ # Supported sites
|
||||
- **Mediasite**
|
||||
- **MediasiteCatalog**
|
||||
- **MediasiteNamedCatalog**
|
||||
- **MediaWorksNZVOD**
|
||||
- **Medici**
|
||||
- **megaphone.fm**: megaphone.fm embedded players
|
||||
- **megatvcom**: megatv.com videos
|
||||
@@ -697,6 +714,7 @@ # Supported sites
|
||||
- **mewatch**
|
||||
- **Mgoon**
|
||||
- **MiaoPai**
|
||||
- **MicrosoftEmbed**
|
||||
- **microsoftstream**: Microsoft Stream
|
||||
- **mildom**: Record ongoing live by specific user in Mildom
|
||||
- **mildom:clip**: Clip in Mildom
|
||||
@@ -796,6 +814,7 @@ # Supported sites
|
||||
- **NBCSports**
|
||||
- **NBCSportsStream**
|
||||
- **NBCSportsVPlayer**
|
||||
- **NBCStations**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
@@ -820,6 +839,7 @@ # Supported sites
|
||||
- **Newgrounds**
|
||||
- **Newgrounds:playlist**
|
||||
- **Newgrounds:user**
|
||||
- **NewsPicks**
|
||||
- **Newstube**
|
||||
- **Newsy**
|
||||
- **NextMedia**: 蘋果日報
|
||||
@@ -829,8 +849,8 @@ # Supported sites
|
||||
- **NexxEmbed**
|
||||
- **NFB**
|
||||
- **NFHSNetwork**
|
||||
- **nfl.com**: (**Currently broken**)
|
||||
- **nfl.com:article**: (**Currently broken**)
|
||||
- **nfl.com**
|
||||
- **nfl.com:article**
|
||||
- **NhkForSchoolBangumi**
|
||||
- **NhkForSchoolProgramList**
|
||||
- **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)
|
||||
@@ -1008,6 +1028,7 @@ # Supported sites
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
- **PornTube**
|
||||
- **PrankCast**
|
||||
- **PremiershipRugby**
|
||||
- **PressTV**
|
||||
- **ProjectVeritas**
|
||||
@@ -1188,6 +1209,7 @@ # Supported sites
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
- **Smotrim**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SonyLIV**: [<abbr title="netrc machine"><em>sonyliv</em></abbr>]
|
||||
@@ -1217,8 +1239,8 @@ # Supported sites
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **spotify**: Spotify episodes
|
||||
- **spotify:show**: Spotify shows
|
||||
- **spotify**: Spotify episodes (**Currently broken**)
|
||||
- **spotify:show**: Spotify shows (**Currently broken**)
|
||||
- **Spreaker**
|
||||
- **SpreakerPage**
|
||||
- **SpreakerShow**
|
||||
@@ -1312,10 +1334,10 @@ # Supported sites
|
||||
- **ThreeSpeak**
|
||||
- **ThreeSpeakUser**
|
||||
- **TikTok**
|
||||
- **tiktok:effect**
|
||||
- **tiktok:sound**
|
||||
- **tiktok:tag**
|
||||
- **tiktok:user**
|
||||
- **tiktok:effect**: (**Currently broken**)
|
||||
- **tiktok:sound**: (**Currently broken**)
|
||||
- **tiktok:tag**: (**Currently broken**)
|
||||
- **tiktok:user**: (**Currently broken**)
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TLC**
|
||||
- **TMZ**
|
||||
@@ -1331,6 +1353,8 @@ # Supported sites
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict**: (**Currently broken**)
|
||||
- **TravelChannel**
|
||||
- **Triller**: [<abbr title="netrc machine"><em>triller</em></abbr>]
|
||||
- **TrillerUser**: [<abbr title="netrc machine"><em>triller</em></abbr>]
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
|
||||
@@ -1354,6 +1378,7 @@ # Supported sites
|
||||
- **Turbo**
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **TV24UAGenericPassthrough**
|
||||
- **TV2Article**
|
||||
- **TV2DK**
|
||||
- **TV2DKBornholmPlay**
|
||||
@@ -1416,6 +1441,7 @@ # Supported sites
|
||||
- **umg:de**: Universal Music Deutschland
|
||||
- **Unistra**
|
||||
- **Unity**
|
||||
- **UnscriptedNewsVideo**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
@@ -1460,8 +1486,6 @@ # Supported sites
|
||||
- **VidioLive**: [<abbr title="netrc machine"><em>vidio</em></abbr>]
|
||||
- **VidioPremier**: [<abbr title="netrc machine"><em>vidio</em></abbr>]
|
||||
- **VidLii**
|
||||
- **vier**: [<abbr title="netrc machine"><em>vier</em></abbr>] vier.be and vijf.be
|
||||
- **vier:videos**
|
||||
- **viewlift**
|
||||
- **viewlift:embed**
|
||||
- **Viidea**
|
||||
@@ -1506,6 +1530,8 @@ # Supported sites
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **vqq:series**
|
||||
- **vqq:video**
|
||||
- **Vrak**
|
||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||
- **VrtNU**: [<abbr title="netrc machine"><em>vrtnu</em></abbr>] VrtNU.be
|
||||
@@ -1555,8 +1581,10 @@ # Supported sites
|
||||
- **Willow**
|
||||
- **WimTV**
|
||||
- **Wistia**
|
||||
- **WistiaChannel**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **wordpress:playlist**
|
||||
- **WorldStarHipHop**
|
||||
- **wppilot**
|
||||
- **wppilot:channels**
|
||||
@@ -1620,6 +1648,7 @@ # Supported sites
|
||||
- **youtube:search**: YouTube search; "ytsearch:" prefix
|
||||
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
|
||||
- **youtube:stories**: YouTube channel stories; "ytstories:" prefix
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
|
||||
@@ -1567,6 +1567,292 @@ def test_parse_ism_formats(self):
|
||||
]
|
||||
},
|
||||
),
|
||||
(
|
||||
'ec-3_test',
|
||||
'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
[{
|
||||
'format_id': 'audio_deu_1-224',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'isma',
|
||||
'tbr': 224,
|
||||
'asr': 48000,
|
||||
'vcodec': 'none',
|
||||
'acodec': 'EC-3',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'audio',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 0,
|
||||
'height': 0,
|
||||
'fourcc': 'EC-3',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
|
||||
'sampling_rate': 48000,
|
||||
'channels': 6,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'audio_ext': 'isma',
|
||||
'video_ext': 'none',
|
||||
'abr': 224,
|
||||
}, {
|
||||
'format_id': 'audio_deu-127',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'isma',
|
||||
'tbr': 127,
|
||||
'asr': 48000,
|
||||
'vcodec': 'none',
|
||||
'acodec': 'AACL',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'audio',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 0,
|
||||
'height': 0,
|
||||
'fourcc': 'AACL',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '1190',
|
||||
'sampling_rate': 48000,
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'audio_ext': 'isma',
|
||||
'video_ext': 'none',
|
||||
'abr': 127,
|
||||
}, {
|
||||
'format_id': 'video_deu-23',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 384,
|
||||
'height': 216,
|
||||
'tbr': 23,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 384,
|
||||
'height': 216,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 23,
|
||||
}, {
|
||||
'format_id': 'video_deu-403',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
'tbr': 403,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 403,
|
||||
}, {
|
||||
'format_id': 'video_deu-680',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
'tbr': 680,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 680,
|
||||
}, {
|
||||
'format_id': 'video_deu-1253',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
'tbr': 1253,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 1253,
|
||||
}, {
|
||||
'format_id': 'video_deu-2121',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
'tbr': 2121,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 2121,
|
||||
}, {
|
||||
'format_id': 'video_deu-3275',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
'tbr': 3275,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 3275,
|
||||
}, {
|
||||
'format_id': 'video_deu-5300',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
'tbr': 5300,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 5300,
|
||||
}, {
|
||||
'format_id': 'video_deu-8079',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'ismv',
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
'tbr': 8079,
|
||||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
'fourcc': 'AVC1',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
|
||||
'channels': 2,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 8079,
|
||||
}],
|
||||
{},
|
||||
),
|
||||
]
|
||||
|
||||
for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
|
||||
|
||||
@@ -662,13 +662,17 @@ def test_add_extra_info(self):
|
||||
'playlist_autonumber': 2,
|
||||
'__last_playlist_index': 100,
|
||||
'n_entries': 10,
|
||||
'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}]
|
||||
'formats': [
|
||||
{'id': 'id 1', 'height': 1080, 'width': 1920},
|
||||
{'id': 'id 2', 'height': 720},
|
||||
{'id': 'id 3'}
|
||||
]
|
||||
}
|
||||
|
||||
def test_prepare_outtmpl_and_filename(self):
|
||||
def test(tmpl, expected, *, info=None, **params):
|
||||
params['outtmpl'] = tmpl
|
||||
ydl = YoutubeDL(params)
|
||||
ydl = FakeYDL(params)
|
||||
ydl._num_downloads = 1
|
||||
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
|
||||
|
||||
@@ -729,6 +733,7 @@ def test(tmpl, expected, *, info=None, **params):
|
||||
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
|
||||
test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
|
||||
test('%(..)s', 'NA')
|
||||
test('%(formats.{id)s', 'NA')
|
||||
|
||||
# Entire info_dict
|
||||
def expect_same_infodict(out):
|
||||
@@ -813,6 +818,12 @@ def expect_same_infodict(out):
|
||||
test('%(formats.:2:-1)r', repr(FORMATS[:2:-1]))
|
||||
test('%(formats.0.id.-1+id)f', '1235.000000')
|
||||
test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
|
||||
out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
|
||||
if 'height' in f else {'id': f['id']}
|
||||
for f in FORMATS])
|
||||
test('%(formats.:.{id,height.:2})j', (out, sanitize(out)))
|
||||
test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
|
||||
test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}'))
|
||||
|
||||
# Alternates
|
||||
test('%(title,id)s', '1234')
|
||||
|
||||
@@ -28,7 +28,8 @@ def test_compat_passthrough(self):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
compat.WINDOWS_VT_MODE
|
||||
|
||||
compat.asyncio.events # Must not raise error
|
||||
# TODO: Test submodule
|
||||
# compat.asyncio.events # Must not raise error
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
old_home = os.environ.get('HOME')
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
from yt_dlp import cookies
|
||||
from yt_dlp.cookies import (
|
||||
LenientSimpleCookie,
|
||||
LinuxChromeCookieDecryptor,
|
||||
MacChromeCookieDecryptor,
|
||||
WindowsChromeCookieDecryptor,
|
||||
@@ -137,3 +138,148 @@ def test_safari_cookie_parsing(self):
|
||||
def test_pbkdf2_sha1(self):
|
||||
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
||||
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
||||
|
||||
|
||||
class TestLenientSimpleCookie(unittest.TestCase):
|
||||
def _run_tests(self, *cases):
|
||||
for message, raw_cookie, expected in cases:
|
||||
cookie = LenientSimpleCookie(raw_cookie)
|
||||
|
||||
with self.subTest(message, expected=expected):
|
||||
self.assertEqual(cookie.keys(), expected.keys(), message)
|
||||
|
||||
for key, expected_value in expected.items():
|
||||
morsel = cookie[key]
|
||||
if isinstance(expected_value, tuple):
|
||||
expected_value, expected_attributes = expected_value
|
||||
else:
|
||||
expected_attributes = {}
|
||||
|
||||
attributes = {
|
||||
key: value
|
||||
for key, value in dict(morsel).items()
|
||||
if value != ""
|
||||
}
|
||||
self.assertEqual(attributes, expected_attributes, message)
|
||||
|
||||
self.assertEqual(morsel.value, expected_value, message)
|
||||
|
||||
def test_parsing(self):
|
||||
self._run_tests(
|
||||
# Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
|
||||
(
|
||||
"Test basic cookie",
|
||||
"chips=ahoy; vienna=finger",
|
||||
{"chips": "ahoy", "vienna": "finger"},
|
||||
),
|
||||
(
|
||||
"Test quoted cookie",
|
||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||
{"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
|
||||
),
|
||||
(
|
||||
"Allow '=' in an unquoted value",
|
||||
"keebler=E=mc2",
|
||||
{"keebler": "E=mc2"},
|
||||
),
|
||||
(
|
||||
"Allow cookies with ':' in their name",
|
||||
"key:term=value:term",
|
||||
{"key:term": "value:term"},
|
||||
),
|
||||
(
|
||||
"Allow '[' and ']' in cookie values",
|
||||
"a=b; c=[; d=r; f=h",
|
||||
{"a": "b", "c": "[", "d": "r", "f": "h"},
|
||||
),
|
||||
(
|
||||
"Test basic cookie attributes",
|
||||
'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
|
||||
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
|
||||
),
|
||||
(
|
||||
"Test flag only cookie attributes",
|
||||
'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
|
||||
{"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
|
||||
),
|
||||
(
|
||||
"Test flag only attribute with values",
|
||||
"eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
|
||||
{"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
|
||||
),
|
||||
(
|
||||
"Test special case for 'expires' attribute, 4 digit year",
|
||||
'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
|
||||
{"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
|
||||
),
|
||||
(
|
||||
"Test special case for 'expires' attribute, 2 digit year",
|
||||
'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
|
||||
{"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
|
||||
),
|
||||
(
|
||||
"Test extra spaces in keys and values",
|
||||
"eggs = scrambled ; secure ; path = bar ; foo=foo ",
|
||||
{"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
|
||||
),
|
||||
(
|
||||
"Test quoted attributes",
|
||||
'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
|
||||
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
|
||||
),
|
||||
# Our own tests that CPython passes
|
||||
(
|
||||
"Allow ';' in quoted value",
|
||||
'chips="a;hoy"; vienna=finger',
|
||||
{"chips": "a;hoy", "vienna": "finger"},
|
||||
),
|
||||
(
|
||||
"Keep only the last set value",
|
||||
"a=c; a=b",
|
||||
{"a": "b"},
|
||||
),
|
||||
)
|
||||
|
||||
def test_lenient_parsing(self):
|
||||
self._run_tests(
|
||||
(
|
||||
"Ignore and try to skip invalid cookies",
|
||||
'chips={"ahoy;": 1}; vienna="finger;"',
|
||||
{"vienna": "finger;"},
|
||||
),
|
||||
(
|
||||
"Ignore cookies without a name",
|
||||
"a=b; unnamed; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Ignore '\"' cookie without name",
|
||||
'a=b; "; c=d',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Skip all space separated values",
|
||||
"x a=b c=d x; e=f",
|
||||
{"a": "b", "c": "d", "e": "f"},
|
||||
),
|
||||
(
|
||||
"Skip all space separated values",
|
||||
'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Expect quote mending",
|
||||
'a=b; invalid="; c=d',
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Reset morsel after invalid to not capture attributes",
|
||||
"a=b; invalid; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Continue after non-flag attribute without value",
|
||||
"a=b; path; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -11,41 +11,46 @@
|
||||
import contextlib
|
||||
import subprocess
|
||||
|
||||
from yt_dlp.utils import encodeArgument
|
||||
from yt_dlp.utils import Popen
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
try:
|
||||
_DEV_NULL = subprocess.DEVNULL
|
||||
except AttributeError:
|
||||
_DEV_NULL = open(os.devnull, 'wb')
|
||||
LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
|
||||
|
||||
|
||||
class TestExecution(unittest.TestCase):
|
||||
def test_import(self):
|
||||
subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir)
|
||||
|
||||
def test_module_exec(self):
|
||||
subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
|
||||
stdout, stderr, returncode = Popen.run(
|
||||
[*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
print(stderr, file=sys.stderr)
|
||||
self.assertEqual(returncode, 0)
|
||||
return stdout.strip(), stderr.strip()
|
||||
|
||||
def test_main_exec(self):
|
||||
subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
self.run_yt_dlp()
|
||||
|
||||
def test_import(self):
|
||||
self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
|
||||
|
||||
def test_module_exec(self):
|
||||
self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
|
||||
|
||||
def test_cmdline_umlauts(self):
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
|
||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
_, stderr = self.run_yt_dlp(opts=('ä', '--version'))
|
||||
self.assertFalse(stderr)
|
||||
|
||||
def test_lazy_extractors(self):
|
||||
try:
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
|
||||
cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
|
||||
|
||||
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
|
||||
self.assertFalse(stderr)
|
||||
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
finally:
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove('yt_dlp/extractor/lazy_extractors.py')
|
||||
os.remove(LAZY_EXTRACTORS)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import math
|
||||
import re
|
||||
|
||||
from yt_dlp.compat import re
|
||||
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
|
||||
@@ -71,6 +71,9 @@ def test_operators(self):
|
||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
||||
self.assertFalse(jsi.call_function('f'))
|
||||
|
||||
def test_array_access(self):
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||
@@ -129,6 +132,11 @@ def test_precedence(self):
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_builtins(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return NaN }
|
||||
''')
|
||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
||||
''')
|
||||
@@ -188,6 +196,30 @@ def test_try(self):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
def test_catch(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_finally(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_nested_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {try {
|
||||
try{throw 10} finally {throw 42}
|
||||
} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_for_loop_continue(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
||||
@@ -200,6 +232,14 @@ def test_for_loop_break(self):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||
return 42 }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_literal_list(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
@@ -347,6 +387,27 @@ def test_regex(self):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
|
||||
|
||||
jsi = JSInterpreter(R'''
|
||||
function x() { let a=/,][}",],()}(\[)/; return a; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').pattern, r',][}",],()}(\[)')
|
||||
|
||||
def test_char_code_at(self):
|
||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
||||
self.assertEqual(jsi.call_function('x', 4), None)
|
||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
||||
|
||||
def test_bitwise_operators_overflow(self):
|
||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
||||
|
||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -109,6 +109,7 @@
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -566,6 +567,7 @@ def test_base_url(self):
|
||||
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/')
|
||||
|
||||
def test_urljoin(self):
|
||||
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
@@ -1873,6 +1875,192 @@ def test_get_compatible_ext(self):
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
|
||||
|
||||
def test_traverse_obj(self):
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
1.2: 1.2,
|
||||
'str': 'str',
|
||||
'None': None,
|
||||
'...': ...,
|
||||
'urls': [
|
||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||
],
|
||||
'data': (
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
}
|
||||
|
||||
# Test base functionality
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||
msg='allow tuple path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||
msg='allow list path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||
msg='allow iterable path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||
msg='single items should be treated as a path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||
|
||||
# Test Ellipsis behavior
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
|
||||
(item for item in _TEST_DATA.values() if item is not None),
|
||||
msg='`...` should give all values except `None`')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
|
||||
msg='`...` selection for dicts should select all values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='nested `...` queries should work')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
||||
msg='`...` query result should be flattened')
|
||||
|
||||
# Test function as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||
[_TEST_DATA['urls']],
|
||||
msg='function as query key should perform a filter based on (key, value)')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
||||
msg='exceptions in the query function should be catched')
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `path_list` should be treated as alternative paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||
msg='alternatives should exit early')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||
msg='alternatives should return `default` if exhausted')
|
||||
|
||||
# Test branch and path nesting
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||
msg='tuple as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||
msg='list as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||
msg='double nesting in path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||
msg='do not fail early on branching')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='tripple nesting in path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='ellipsis as branch path start gets flattened')
|
||||
|
||||
# Test dictionary as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||
msg='dict key should result in a dict with the same keys')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||
{0: 'https://www.example.com/0'},
|
||||
msg='dict key should allow paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='tuple in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='double nesting in dict path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||
msg='tripple nesting in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj({}, {0: 1}, default=...), {0: ...},
|
||||
msg='do not remove `None` values when dict key')
|
||||
|
||||
# Testing default parameter behavior
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||
msg='default value should be `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
|
||||
msg='chained fails should result in default')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||
msg='should not short cirquit on `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||
msg='invalid dict key should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||
msg='`None` is a deliberate sentinel and should become `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||
msg='`IndexError` should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
|
||||
msg='if branched but not successfull return `default`, not `[]`')
|
||||
|
||||
# Testing expected_type behavior
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), 'str',
|
||||
msg='accept matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
|
||||
msg='reject non matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), '0',
|
||||
msg='transform type using type function')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
|
||||
expected_type=lambda _: 1 / 0), None,
|
||||
msg='wrap expected_type fuction in try_call')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str), ['str'],
|
||||
msg='eliminate items that expected_type fails on')
|
||||
|
||||
# Test get_all behavior
|
||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
|
||||
msg='if not `get_all`, return only first matching value')
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
|
||||
msg='do not overflatten if not `get_all`')
|
||||
|
||||
# Test casesense behavior
|
||||
_CASESENSE_DATA = {
|
||||
'KeY': 'value0',
|
||||
0: {
|
||||
'KeY': 'value1',
|
||||
0: {'KeY': 'value2'},
|
||||
},
|
||||
}
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||
msg='dict keys should be case sensitive unless `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||
casesense=False), 'value0',
|
||||
msg='allow non matching key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||
casesense=False), ['value1'],
|
||||
msg='allow non matching key case in branch if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||
casesense=False), ['value2'],
|
||||
msg='allow non matching key case in branch path if `casesense`')
|
||||
|
||||
# Test traverse_string behavior
|
||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||
msg='do not traverse into string if not `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||
traverse_string=True), 's',
|
||||
msg='traverse into string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||
traverse_string=True), '.',
|
||||
msg='traverse into converted data if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
|
||||
traverse_string=True), list('str'),
|
||||
msg='`...` branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||
traverse_string=True), ['s', 'r'],
|
||||
msg='branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
|
||||
traverse_string=True), list('str'),
|
||||
msg='function branching into string should result in list')
|
||||
|
||||
# Test is_user_input behavior
|
||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
||||
is_user_input=True), 3,
|
||||
msg='allow for string indexing if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
||||
is_user_input=True), tuple(range(8))[3:],
|
||||
msg='allow for string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
||||
is_user_input=True), tuple(range(8))[:4:2],
|
||||
msg='allow step in string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
||||
is_user_input=True), range(8),
|
||||
msg='`:` should be treated as `...` if `is_user_input`')
|
||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -110,6 +110,26 @@
|
||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
||||
1
test/testdata/ism/ec-3_test.Manifest
vendored
Normal file
1
test/testdata/ism/ec-3_test.Manifest
vendored
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--Transformed by VSMT using XSL stylesheet for rule Identity--><!-- Created with Unified Streaming Platform (version=1.10.12-18737) --><SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="370000000"><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="127802" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /><c t="0" d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="7253333" /></StreamIndex><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu_1" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu_1={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="224000" CodecPrivateData="00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00" FourCCData="0700200F00" SamplingRate="48000" Channels="6" BitsPerSample="16" PacketSize="896" AudioTag="65534" FourCC="EC-3" /><c t="0" d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="8320000" /></StreamIndex><StreamIndex Type="video" QualityLevels="8" TimeScale="10000000" Language="deu" Name="video_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(video_deu={start time})?noStreamProfile=1" MaxWidth="1920" MaxHeight="1080" DisplayWidth="1920" DisplayHeight="1080"><QualityLevel Index="0" Bitrate="23909" CodecPrivateData="000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8" MaxWidth="384" MaxHeight="216" FourCC="AVC1" /><QualityLevel Index="1" Bitrate="403188" CodecPrivateData="00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2" MaxWidth="400" MaxHeight="224" FourCC="AVC1" /><QualityLevel Index="2" Bitrate="680365" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="3" Bitrate="1253465" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="4" Bitrate="2121558" CodecPrivateData="00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80" MaxWidth="768" MaxHeight="432" FourCC="AVC1" /><QualityLevel Index="5" Bitrate="3275545" CodecPrivateData="00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80" MaxWidth="1280" MaxHeight="720" FourCC="AVC1" /><QualityLevel Index="6" Bitrate="5300196" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><QualityLevel Index="7" Bitrate="8079312" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><c t="0" d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="10000000" /></StreamIndex></SmoothStreamingMedia>
|
||||
@@ -29,6 +29,7 @@
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
|
||||
@@ -47,7 +48,7 @@
|
||||
get_postprocessor,
|
||||
)
|
||||
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
|
||||
from .update import detect_variant
|
||||
from .update import REPOSITORY, current_git_head, detect_variant
|
||||
from .utils import (
|
||||
DEFAULT_OUTTMPL,
|
||||
IDENTITY,
|
||||
@@ -89,6 +90,7 @@
|
||||
args_to_str,
|
||||
bug_reports_message,
|
||||
date_from_str,
|
||||
deprecation_warning,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
encode_compat_str,
|
||||
@@ -106,6 +108,7 @@
|
||||
get_domain,
|
||||
int_or_none,
|
||||
iri_to_uri,
|
||||
is_path_like,
|
||||
join_nonempty,
|
||||
locked_file,
|
||||
make_archive_id,
|
||||
@@ -115,6 +118,7 @@
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
orderedSet_from_options,
|
||||
parse_filesize,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
@@ -236,7 +240,7 @@ class YoutubeDL:
|
||||
Default is 'only_download' for CLI, but False for API
|
||||
skip_playlist_after_errors: Number of allowed failures until the rest of
|
||||
the playlist is skipped
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
allowed_extractors: List of regexes to match against extractor names that are allowed
|
||||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
and don't overwrite any file if False
|
||||
@@ -248,8 +252,8 @@ class YoutubeDL:
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
logtostderr: Print everything to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
clean_infojson: Remove private fields from the infojson
|
||||
@@ -290,9 +294,8 @@ class YoutubeDL:
|
||||
downloaded.
|
||||
Videos without view count information are always
|
||||
downloaded. None for no limit.
|
||||
download_archive: File name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded
|
||||
again.
|
||||
download_archive: A set, or the name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded again.
|
||||
break_on_existing: Stop the download process after attempting to download a
|
||||
file that is in the archive.
|
||||
break_on_reject: Stop the download process when encountering a video that
|
||||
@@ -301,8 +304,9 @@ class YoutubeDL:
|
||||
should act on each input URL as opposed to for the entire queue
|
||||
cookiefile: File name or text stream from where cookies should be read and dumped to
|
||||
cookiesfrombrowser: A tuple containing the name of the browser, the profile
|
||||
name/path from where cookies are loaded, and the name of the
|
||||
keyring, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
|
||||
name/path from where cookies are loaded, the name of the keyring,
|
||||
and the container name, e.g. ('chrome', ) or
|
||||
('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
|
||||
legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
|
||||
support RFC 5746 secure renegotiation
|
||||
nocheckcertificate: Do not verify SSL certificates
|
||||
@@ -476,6 +480,8 @@ class YoutubeDL:
|
||||
|
||||
The following options are deprecated and may be removed in the future:
|
||||
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
- Use allowed_extractors = ['generic', 'default']
|
||||
playliststart: - Use playlist_items
|
||||
Playlist item to start at.
|
||||
playlistend: - Use playlist_items
|
||||
@@ -591,7 +597,8 @@ def __init__(self, params=None, auto_init=True):
|
||||
for type_, stream in self._out_files.items_ if type_ != 'console'
|
||||
})
|
||||
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
|
||||
# The code is left like this to be reused for future deprecations
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
|
||||
current_version = sys.version_info[:2]
|
||||
if current_version < MIN_RECOMMENDED:
|
||||
msg = ('Support for Python version %d.%d has been deprecated. '
|
||||
@@ -626,7 +633,7 @@ def check_deprecated(param, option, suggestion):
|
||||
for msg in self.params.get('_warnings', []):
|
||||
self.report_warning(msg)
|
||||
for msg in self.params.get('_deprecation_warnings', []):
|
||||
self.deprecation_warning(msg)
|
||||
self.deprecated_feature(msg)
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
if 'list-formats' in self.params['compat_opts']:
|
||||
@@ -716,21 +723,23 @@ def check_deprecated(param, option, suggestion):
|
||||
|
||||
def preload_download_archive(fn):
|
||||
"""Preload the archive, if any is specified"""
|
||||
archive = set()
|
||||
if fn is None:
|
||||
return False
|
||||
return archive
|
||||
elif not is_path_like(fn):
|
||||
return fn
|
||||
|
||||
self.write_debug(f'Loading archive file {fn!r}')
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
self.archive.add(line.strip())
|
||||
archive.add(line.strip())
|
||||
except OSError as ioe:
|
||||
if ioe.errno != errno.ENOENT:
|
||||
raise
|
||||
return False
|
||||
return True
|
||||
return archive
|
||||
|
||||
self.archive = set()
|
||||
preload_download_archive(self.params.get('download_archive'))
|
||||
self.archive = preload_download_archive(self.params.get('download_archive'))
|
||||
|
||||
def warn_if_short_id(self, argv):
|
||||
# short YouTube ID starting with dash?
|
||||
@@ -756,13 +765,6 @@ def add_info_extractor(self, ie):
|
||||
self._ies_instances[ie_key] = ie
|
||||
ie.set_downloader(self)
|
||||
|
||||
def _get_info_extractor_class(self, ie_key):
|
||||
ie = self._ies.get(ie_key)
|
||||
if ie is None:
|
||||
ie = get_info_extractor(ie_key)
|
||||
self.add_info_extractor(ie)
|
||||
return ie
|
||||
|
||||
def get_info_extractor(self, ie_key):
|
||||
"""
|
||||
Get an instance of an IE with name ie_key, it will try to get one from
|
||||
@@ -779,8 +781,19 @@ def add_default_info_extractors(self):
|
||||
"""
|
||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||
"""
|
||||
for ie in gen_extractor_classes():
|
||||
self.add_info_extractor(ie)
|
||||
all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
|
||||
all_ies['end'] = UnsupportedURLIE()
|
||||
try:
|
||||
ie_names = orderedSet_from_options(
|
||||
self.params.get('allowed_extractors', ['default']), {
|
||||
'all': list(all_ies),
|
||||
'default': [name for name, ie in all_ies.items() if ie._ENABLED],
|
||||
}, use_regex=True)
|
||||
except re.error as e:
|
||||
raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
|
||||
for name in ie_names:
|
||||
self.add_info_extractor(all_ies[name])
|
||||
self.write_debug(f'Loaded {len(ie_names)} extractors')
|
||||
|
||||
def add_post_processor(self, pp, when='post_process'):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
@@ -826,12 +839,14 @@ def _write_string(self, message, out=None, only_once=False):
|
||||
def to_stdout(self, message, skip_eol=False, quiet=None):
|
||||
"""Print message to stdout"""
|
||||
if quiet is not None:
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
|
||||
'Use "YoutubeDL.to_screen" instead')
|
||||
if skip_eol is not False:
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
|
||||
'Use "YoutubeDL.to_screen" instead')
|
||||
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
|
||||
|
||||
def to_screen(self, message, skip_eol=False, quiet=None):
|
||||
def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
|
||||
"""Print message to screen if not in quiet mode"""
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
@@ -840,7 +855,7 @@ def to_screen(self, message, skip_eol=False, quiet=None):
|
||||
return
|
||||
self._write_string(
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._out_files.screen)
|
||||
self._out_files.screen, only_once=only_once)
|
||||
|
||||
def to_stderr(self, message, only_once=False):
|
||||
"""Print message to stderr"""
|
||||
@@ -964,11 +979,14 @@ def report_warning(self, message, only_once=False):
|
||||
return
|
||||
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
|
||||
|
||||
def deprecation_warning(self, message):
|
||||
def deprecation_warning(self, message, *, stacklevel=0):
|
||||
deprecation_warning(
|
||||
message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
|
||||
|
||||
def deprecated_feature(self, message):
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(f'DeprecationWarning: {message}')
|
||||
else:
|
||||
self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
|
||||
self.params['logger'].warning(f'Deprecated Feature: {message}')
|
||||
self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
|
||||
|
||||
def report_error(self, message, *args, **kwargs):
|
||||
'''
|
||||
@@ -1028,7 +1046,7 @@ def _parse_outtmpl(self):
|
||||
|
||||
def get_output_path(self, dir_type='', filename=None):
|
||||
paths = self.params.get('paths', {})
|
||||
assert isinstance(paths, dict)
|
||||
assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
|
||||
path = os.path.join(
|
||||
expand_path(paths.get('home', '').strip()),
|
||||
expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
|
||||
@@ -1111,8 +1129,12 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||
'-': float.__sub__,
|
||||
}
|
||||
# Field is of the form key1.key2...
|
||||
# where keys (except first) can be string, int or slice
|
||||
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
|
||||
# where keys (except first) can be string, int, slice or "{field, ...}"
|
||||
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
|
||||
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
|
||||
'inner': FIELD_INNER_RE,
|
||||
'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
|
||||
}
|
||||
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
|
||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||
INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
|
||||
@@ -1126,11 +1148,20 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||
(?:\|(?P<default>.*?))?
|
||||
)$''')
|
||||
|
||||
def _traverse_infodict(k):
|
||||
k = k.split('.')
|
||||
if k[0] == '':
|
||||
k.pop(0)
|
||||
return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
|
||||
def _traverse_infodict(fields):
|
||||
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
|
||||
for f in ([x] if x.startswith('{') else x.split('.'))]
|
||||
for i in (0, -1):
|
||||
if fields and not fields[i]:
|
||||
fields.pop(i)
|
||||
|
||||
for i, f in enumerate(fields):
|
||||
if not f.startswith('{'):
|
||||
continue
|
||||
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
|
||||
fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
|
||||
|
||||
return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
|
||||
|
||||
def get_value(mdict):
|
||||
# Object traversal
|
||||
@@ -1216,9 +1247,11 @@ def create_key(outer_mobj):
|
||||
delim = '\n' if '#' in flags else ', '
|
||||
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
|
||||
elif fmt[-1] == 'j': # json
|
||||
value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
|
||||
value, fmt = json.dumps(
|
||||
value, default=_dumpjson_default,
|
||||
indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt
|
||||
elif fmt[-1] == 'h': # html
|
||||
value, fmt = escapeHTML(value), str_fmt
|
||||
value, fmt = escapeHTML(str(value)), str_fmt
|
||||
elif fmt[-1] == 'q': # quoted
|
||||
value = map(str, variadic(value) if '#' in flags else [value])
|
||||
value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
|
||||
@@ -1390,18 +1423,19 @@ def add_extra_info(info_dict, extra_info):
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info=None,
|
||||
process=True, force_generic_extractor=False):
|
||||
"""
|
||||
Return a list with a dictionary for each video extracted.
|
||||
Extract and return the information dictionary of the URL
|
||||
|
||||
Arguments:
|
||||
url -- URL to extract
|
||||
@param url URL to extract
|
||||
|
||||
Keyword arguments:
|
||||
download -- whether to download videos during extraction
|
||||
ie_key -- extractor key hint
|
||||
extra_info -- dictionary containing the extra values to add to each result
|
||||
process -- whether to resolve all unresolved references (URLs, playlist items),
|
||||
must be True for download to work.
|
||||
force_generic_extractor -- force using the generic extractor
|
||||
@param download Whether to download videos
|
||||
@param process Whether to resolve all unresolved references (URLs, playlist items).
|
||||
Must be True for download to work
|
||||
@param ie_key Use only the extractor with this key
|
||||
|
||||
@param extra_info Dictionary containing the extra values to add to the info (For internal use only)
|
||||
@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
|
||||
"""
|
||||
|
||||
if extra_info is None:
|
||||
@@ -1411,11 +1445,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
|
||||
ie_key = 'Generic'
|
||||
|
||||
if ie_key:
|
||||
ies = {ie_key: self._get_info_extractor_class(ie_key)}
|
||||
ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
|
||||
else:
|
||||
ies = self._ies
|
||||
|
||||
for ie_key, ie in ies.items():
|
||||
for key, ie in ies.items():
|
||||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
@@ -1424,14 +1458,16 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
|
||||
'and will probably not work.')
|
||||
|
||||
temp_id = ie.get_temp_id(url)
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
||||
self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
|
||||
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
|
||||
if self.params.get('break_on_existing', False):
|
||||
raise ExistingVideoReached()
|
||||
break
|
||||
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
|
||||
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
|
||||
self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
|
||||
tb=False if extractors_restricted else None)
|
||||
|
||||
def _handle_extraction_exceptions(func):
|
||||
@functools.wraps(func)
|
||||
@@ -1651,8 +1687,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
|
||||
elif result_type in ('playlist', 'multi_video'):
|
||||
# Protect from infinite recursion due to recursively nested playlists
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||
webpage_url = ie_result['webpage_url']
|
||||
if webpage_url in self._playlist_urls:
|
||||
webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
|
||||
if webpage_url and webpage_url in self._playlist_urls:
|
||||
self.to_screen(
|
||||
'[download] Skipping already downloaded playlist: %s'
|
||||
% ie_result.get('title') or ie_result.get('id'))
|
||||
@@ -1706,14 +1742,17 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
|
||||
}
|
||||
if strict:
|
||||
return info
|
||||
if ie_result.get('webpage_url'):
|
||||
info.update({
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'webpage_url_domain': get_domain(ie_result['webpage_url']),
|
||||
})
|
||||
return {
|
||||
**info,
|
||||
'playlist_index': 0,
|
||||
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'webpage_url_domain': get_domain(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
@@ -2387,6 +2426,8 @@ def _fill_common_fields(self, info_dict, is_video=True):
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is None:
|
||||
info_dict[key] = (live_status == key)
|
||||
if live_status == 'post_live':
|
||||
info_dict['was_live'] = True
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
@@ -2494,11 +2535,11 @@ def sanitize_numeric_fields(info):
|
||||
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
if info_dict['_has_drm'] and formats and all(
|
||||
f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||
self.report_warning(
|
||||
'This video is DRM protected and only images are available for download. '
|
||||
'Use --list-formats to see them')
|
||||
|
||||
if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||
self.report_warning(
|
||||
f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
|
||||
'only images are available for download. Use --list-formats to see them'.capitalize())
|
||||
|
||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||
if not get_from_start:
|
||||
@@ -2510,9 +2551,6 @@ def sanitize_numeric_fields(info):
|
||||
'--live-from-start is passed, but there are no formats that can be downloaded from the start. '
|
||||
'If you want to download from the current time, use --no-live-from-start'))
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
if not url:
|
||||
@@ -2525,7 +2563,10 @@ def is_wellformed(f):
|
||||
return True
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
formats = list(filter(is_wellformed, formats or []))
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
@@ -2661,31 +2702,29 @@ def is_wellformed(f):
|
||||
# Process what we can, even without any available formats.
|
||||
formats_to_download = [{}]
|
||||
|
||||
requested_ranges = self.params.get('download_ranges')
|
||||
if requested_ranges:
|
||||
requested_ranges = tuple(requested_ranges(info_dict, self))
|
||||
|
||||
requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
|
||||
best_format, downloaded_formats = formats_to_download[-1], []
|
||||
if download:
|
||||
if best_format:
|
||||
if best_format and requested_ranges:
|
||||
def to_screen(*msg):
|
||||
self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
|
||||
|
||||
to_screen(f'Downloading {len(formats_to_download)} format(s):',
|
||||
(f['format_id'] for f in formats_to_download))
|
||||
if requested_ranges:
|
||||
if requested_ranges != ({}, ):
|
||||
to_screen(f'Downloading {len(requested_ranges)} time ranges:',
|
||||
(f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
|
||||
(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
|
||||
max_downloads_reached = False
|
||||
|
||||
for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
|
||||
for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
|
||||
new_info = self._copy_infodict(info_dict)
|
||||
new_info.update(fmt)
|
||||
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
|
||||
end_time = offset + min(chapter.get('end_time', duration), duration)
|
||||
if chapter or offset:
|
||||
new_info.update({
|
||||
'section_start': offset + chapter.get('start_time', 0),
|
||||
'section_end': offset + min(chapter.get('end_time', duration), duration),
|
||||
'section_end': end_time if end_time < offset + duration else None,
|
||||
'section_title': chapter.get('title'),
|
||||
'section_number': chapter.get('index'),
|
||||
})
|
||||
@@ -2727,42 +2766,26 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
if lang not in available_subs:
|
||||
available_subs[lang] = cap_info
|
||||
|
||||
if (not self.params.get('writesubtitles') and not
|
||||
self.params.get('writeautomaticsub') or not
|
||||
available_subs):
|
||||
if not available_subs or (
|
||||
not self.params.get('writesubtitles')
|
||||
and not self.params.get('writeautomaticsub')):
|
||||
return None
|
||||
|
||||
all_sub_langs = tuple(available_subs.keys())
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = all_sub_langs
|
||||
elif self.params.get('subtitleslangs', False):
|
||||
# A list is used so that the order of languages will be the same as
|
||||
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
|
||||
requested_langs = []
|
||||
for lang_re in self.params.get('subtitleslangs'):
|
||||
discard = lang_re[0] == '-'
|
||||
if discard:
|
||||
lang_re = lang_re[1:]
|
||||
if lang_re == 'all':
|
||||
if discard:
|
||||
requested_langs = []
|
||||
else:
|
||||
requested_langs.extend(all_sub_langs)
|
||||
continue
|
||||
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
|
||||
if discard:
|
||||
for lang in current_langs:
|
||||
while lang in requested_langs:
|
||||
requested_langs.remove(lang)
|
||||
else:
|
||||
requested_langs.extend(current_langs)
|
||||
requested_langs = orderedSet(requested_langs)
|
||||
try:
|
||||
requested_langs = orderedSet_from_options(
|
||||
self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
|
||||
except re.error as e:
|
||||
raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
|
||||
elif normal_sub_langs:
|
||||
requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
|
||||
else:
|
||||
requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
|
||||
if requested_langs:
|
||||
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
|
||||
self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
@@ -2798,12 +2821,16 @@ def _forceprint(self, key, info_dict):
|
||||
info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
|
||||
def format_tmpl(tmpl):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
return f'%({tmpl})s'
|
||||
return tmpl
|
||||
mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
|
||||
if not mobj:
|
||||
return tmpl
|
||||
|
||||
fmt = '%({})s'
|
||||
if tmpl.startswith('{'):
|
||||
tmpl = f'.{tmpl}'
|
||||
if tmpl.endswith('='):
|
||||
tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
|
||||
return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
|
||||
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
|
||||
@@ -3270,6 +3297,7 @@ def wrapper(*args, **kwargs):
|
||||
self.to_screen(f'[info] {e}')
|
||||
if not self.params.get('break_per_url'):
|
||||
raise
|
||||
self._num_downloads = 0
|
||||
else:
|
||||
if self.params.get('dump_single_json', False):
|
||||
self.post_extract(res)
|
||||
@@ -3318,6 +3346,12 @@ def sanitize_info(info_dict, remove_private_keys=False):
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
info_dict.setdefault('_type', 'video')
|
||||
info_dict.setdefault('_version', {
|
||||
'version': __version__,
|
||||
'current_git_head': current_git_head(),
|
||||
'release_git_head': RELEASE_GIT_HEAD,
|
||||
'repository': REPOSITORY,
|
||||
})
|
||||
|
||||
if remove_private_keys:
|
||||
reject = lambda k, v: v is None or k.startswith('__') or k in {
|
||||
@@ -3438,8 +3472,7 @@ def _make_archive_id(self, info_dict):
|
||||
return make_archive_id(extractor, video_id)
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
if not self.archive:
|
||||
return False
|
||||
|
||||
vid_ids = [self._make_archive_id(info_dict)]
|
||||
@@ -3452,9 +3485,11 @@ def record_download_archive(self, info_dict):
|
||||
return
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
assert vid_id
|
||||
|
||||
self.write_debug(f'Adding to archive: {vid_id}')
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + '\n')
|
||||
if is_path_like(fn):
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + '\n')
|
||||
self.archive.add(vid_id)
|
||||
|
||||
@staticmethod
|
||||
@@ -3607,7 +3642,7 @@ def render_thumbnails_table(self, info_dict):
|
||||
return None
|
||||
return render_table(
|
||||
self._list_format_headers('ID', 'Width', 'Height', 'URL'),
|
||||
[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
|
||||
[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
|
||||
|
||||
def render_subtitles_table(self, video_id, subtitles):
|
||||
def _row(lang, formats):
|
||||
@@ -3650,6 +3685,8 @@ def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
from . import _IN_CLI # Must be delayed import
|
||||
|
||||
# These imports can be slow. So import them only as needed
|
||||
from .extractor.extractors import _LAZY_LOADER
|
||||
from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
|
||||
@@ -3682,9 +3719,11 @@ def get_encoding(stream):
|
||||
if VARIANT not in (None, 'pip'):
|
||||
source += '*'
|
||||
write_debug(join_nonempty(
|
||||
'yt-dlp version', __version__,
|
||||
f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
|
||||
__version__,
|
||||
f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
|
||||
'' if source == 'unknown' else f'({source})',
|
||||
'' if _IN_CLI else 'API',
|
||||
delim=' '))
|
||||
if not _LAZY_LOADER:
|
||||
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
@@ -3698,18 +3737,8 @@ def get_encoding(stream):
|
||||
if self.params['compat_opts']:
|
||||
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
|
||||
|
||||
if source == 'source':
|
||||
try:
|
||||
stdout, _, _ = Popen.run(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if re.fullmatch('[0-9a-f]+', stdout.strip()):
|
||||
write_debug(f'Git HEAD: {stdout.strip()}')
|
||||
except Exception:
|
||||
with contextlib.suppress(Exception):
|
||||
sys.exc_clear()
|
||||
|
||||
if current_git_head():
|
||||
write_debug(f'Git HEAD: {current_git_head()}')
|
||||
write_debug(system_identifier())
|
||||
|
||||
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541
|
||||
try:
|
||||
import contextvars # noqa: F401
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
@@ -21,6 +25,7 @@
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegPostProcessor,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
@@ -58,6 +63,8 @@
|
||||
)
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
||||
|
||||
def _exit(status=0, *args):
|
||||
for msg in args:
|
||||
@@ -319,14 +326,15 @@ def validate_outtmpl(tmpl, msg):
|
||||
|
||||
def parse_chapters(name, value):
|
||||
chapters, ranges = [], []
|
||||
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
||||
for regex in value or []:
|
||||
if regex.startswith('*'):
|
||||
for range in regex[1:].split(','):
|
||||
dur = tuple(map(parse_duration, range.strip().split('-')))
|
||||
if len(dur) == 2 and all(t is not None for t in dur):
|
||||
ranges.append(dur)
|
||||
else:
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
|
||||
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
|
||||
if None in (dur or [None]):
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end')
|
||||
ranges.append(dur)
|
||||
continue
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
@@ -339,10 +347,16 @@ def parse_chapters(name, value):
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser)
|
||||
container = None
|
||||
mobj = re.fullmatch(r'''(?x)
|
||||
(?P<name>[^+:]+)
|
||||
(?:\s*\+\s*(?P<keyring>[^:]+))?
|
||||
(?:\s*:\s*(?P<profile>.+?))?
|
||||
(?:\s*::\s*(?P<container>.+))?
|
||||
''', opts.cookiesfrombrowser)
|
||||
if mobj is None:
|
||||
raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
|
||||
browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile')
|
||||
browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container')
|
||||
browser_name = browser_name.lower()
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". '
|
||||
@@ -352,7 +366,7 @@ def parse_chapters(name, value):
|
||||
if keyring not in SUPPORTED_KEYRINGS:
|
||||
raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". '
|
||||
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring)
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
|
||||
|
||||
# MetadataParser
|
||||
def metadataparser_actions(f):
|
||||
@@ -397,6 +411,9 @@ def metadataparser_actions(f):
|
||||
if opts.download_archive is not None:
|
||||
opts.download_archive = expand_path(opts.download_archive)
|
||||
|
||||
if opts.ffmpeg_location is not None:
|
||||
opts.ffmpeg_location = expand_path(opts.ffmpeg_location)
|
||||
|
||||
if opts.user_agent is not None:
|
||||
opts.headers.setdefault('User-Agent', opts.user_agent)
|
||||
if opts.referer is not None:
|
||||
@@ -472,7 +489,7 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u
|
||||
val1=opts.sponskrub and opts.sponskrub_cut)
|
||||
|
||||
# Conflicts with --allow-unplayable-formats
|
||||
report_conflict('--add-metadata', 'addmetadata')
|
||||
report_conflict('--embed-metadata', 'addmetadata')
|
||||
report_conflict('--embed-chapters', 'addchapters')
|
||||
report_conflict('--embed-info-json', 'embed_infojson')
|
||||
report_conflict('--embed-subs', 'embedsubtitles')
|
||||
@@ -761,6 +778,7 @@ def parse_options(argv=None):
|
||||
'windowsfilenames': opts.windowsfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'allowed_extractors': opts.allowed_extractors or ['default'],
|
||||
'ratelimit': opts.ratelimit,
|
||||
'throttledratelimit': opts.throttledratelimit,
|
||||
'overwrites': opts.overwrites,
|
||||
@@ -902,6 +920,11 @@ def _real_main(argv=None):
|
||||
if print_extractor_information(opts, all_urls):
|
||||
return
|
||||
|
||||
# We may need ffmpeg_location without having access to the YoutubeDL instance
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2191
|
||||
if opts.ffmpeg_location:
|
||||
FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
pre_process = opts.update_self or opts.rm_cachedir
|
||||
actual_use = all_urls or opts.load_info_filename
|
||||
|
||||
@@ -14,4 +14,5 @@
|
||||
import yt_dlp
|
||||
|
||||
if __name__ == '__main__':
|
||||
yt_dlp._IN_CLI = True
|
||||
yt_dlp.main()
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .utils import expand_path, write_json_file
|
||||
from .utils import expand_path, traverse_obj, version_tuple, write_json_file
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class Cache:
|
||||
@@ -45,12 +46,20 @@ def store(self, section, key, data, dtype='json'):
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
self._ydl.write_debug(f'Saving {section}.{key} to cache')
|
||||
write_json_file(data, fn)
|
||||
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
def _validate(self, data, min_ver):
|
||||
version = traverse_obj(data, 'yt-dlp_version')
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, '2022.08.19'
|
||||
if not min_ver or version_tuple(version) >= version_tuple(min_ver):
|
||||
return data['data']
|
||||
self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, *, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
@@ -61,8 +70,8 @@ def load(self, section, key, dtype='json', default=None):
|
||||
try:
|
||||
with open(cache_fn, encoding='utf-8') as cachef:
|
||||
self._ydl.write_debug(f'Loading {section}.{key} from cache')
|
||||
return json.load(cachef)
|
||||
except ValueError:
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except (ValueError, KeyError):
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except OSError as oe:
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from . import re
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
@@ -33,6 +32,7 @@ def compat_etree_fromstring(text):
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
import re
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
else:
|
||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
||||
|
||||
@@ -22,10 +22,14 @@
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
from .compat_utils import passthrough_module # isort: split
|
||||
from .asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from .re import Pattern as compat_Pattern # noqa: F401
|
||||
from .re import match as compat_Match # noqa: F401
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
import re # noqa: F401
|
||||
from asyncio import run as compat_asyncio_run # noqa: F401
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
# flake8: noqa: F405
|
||||
from asyncio import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'asyncio')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
run # >= 3.7
|
||||
except NameError:
|
||||
def run(coro):
|
||||
try:
|
||||
loop = get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = new_event_loop()
|
||||
set_event_loop(loop)
|
||||
loop.run_until_complete(coro)
|
||||
|
||||
try:
|
||||
all_tasks # >= 3.7
|
||||
except NameError:
|
||||
all_tasks = Task.all_tasks
|
||||
@@ -1,18 +0,0 @@
|
||||
# flake8: noqa: F405
|
||||
from re import * # F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 're')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
Pattern # >= 3.7
|
||||
except NameError:
|
||||
Pattern = type(compile(''))
|
||||
|
||||
|
||||
try:
|
||||
Match # >= 3.7
|
||||
except NameError:
|
||||
Match = type(compile('').match(''))
|
||||
@@ -1,8 +1,10 @@
|
||||
import base64
|
||||
import contextlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
@@ -24,7 +26,14 @@
|
||||
sqlite3,
|
||||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
|
||||
from .utils import (
|
||||
Popen,
|
||||
YoutubeDLCookieJar,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
is_path_like,
|
||||
try_call,
|
||||
)
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
@@ -85,11 +94,12 @@ def _create_progress_bar(logger):
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(
|
||||
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
|
||||
|
||||
if cookie_file is not None:
|
||||
is_filename = YoutubeDLCookieJar.is_path(cookie_file)
|
||||
is_filename = is_path_like(cookie_file)
|
||||
if is_filename:
|
||||
cookie_file = expand_path(cookie_file)
|
||||
|
||||
@@ -101,9 +111,9 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
|
||||
if browser_name == 'firefox':
|
||||
return _extract_firefox_cookies(profile, logger)
|
||||
return _extract_firefox_cookies(profile, container, logger)
|
||||
elif browser_name == 'safari':
|
||||
return _extract_safari_cookies(profile, logger)
|
||||
elif browser_name in CHROMIUM_BASED_BROWSERS:
|
||||
@@ -112,7 +122,7 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(),
|
||||
raise ValueError(f'unknown browser: {browser_name}')
|
||||
|
||||
|
||||
def _extract_firefox_cookies(profile, logger):
|
||||
def _extract_firefox_cookies(profile, container, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not sqlite3:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
@@ -131,11 +141,36 @@ def _extract_firefox_cookies(profile, logger):
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
||||
container_id = None
|
||||
if container not in (None, 'none'):
|
||||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
|
||||
)), None)
|
||||
if not isinstance(container_id, int):
|
||||
raise ValueError(f'could not find firefox container "{container}" in containers.json')
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
if isinstance(container_id, int):
|
||||
logger.debug(
|
||||
f'Only loading cookies from firefox container "{container}", ID {container_id}')
|
||||
cursor.execute(
|
||||
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
|
||||
(f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
|
||||
elif container == 'none':
|
||||
logger.debug('Only loading cookies not belonging to any container')
|
||||
cursor.execute(
|
||||
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
|
||||
else:
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
jar = YoutubeDLCookieJar()
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
table = cursor.fetchall()
|
||||
@@ -810,12 +845,15 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
def _get_mac_keyring_password(browser_keyring_name, logger):
|
||||
logger.debug('using find-generic-password to obtain password from OSX keychain')
|
||||
try:
|
||||
stdout, _, _ = Popen.run(
|
||||
stdout, _, returncode = Popen.run(
|
||||
['security', 'find-generic-password',
|
||||
'-w', # write password to stdout
|
||||
'-a', browser_keyring_name, # match 'account'
|
||||
'-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
|
||||
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
if returncode:
|
||||
logger.warning('find-generic-password failed')
|
||||
return None
|
||||
return stdout.rstrip(b'\n')
|
||||
except Exception as e:
|
||||
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
|
||||
@@ -948,11 +986,106 @@ def _is_path(value):
|
||||
return os.path.sep in value
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None):
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser: "{browser_name}"')
|
||||
if keyring not in (None, *SUPPORTED_KEYRINGS):
|
||||
raise ValueError(f'unsupported keyring: "{keyring}"')
|
||||
if profile is not None and _is_path(profile):
|
||||
profile = os.path.expanduser(profile)
|
||||
return browser_name, profile, keyring
|
||||
if profile is not None and _is_path(expand_path(profile)):
|
||||
profile = expand_path(profile)
|
||||
return browser_name, profile, keyring, container
|
||||
|
||||
|
||||
class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
"""More lenient version of http.cookies.SimpleCookie"""
|
||||
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
|
||||
_LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
|
||||
|
||||
_RESERVED = {
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"domain",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
"samesite",
|
||||
}
|
||||
|
||||
_FLAGS = {"secure", "httponly"}
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r"""
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
( # Start of potential value
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
| # or
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
) # End of potential value
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII | re.VERBOSE)
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
if not isinstance(data, str):
|
||||
return super().load(data)
|
||||
|
||||
morsel = None
|
||||
index = 0
|
||||
length = len(data)
|
||||
|
||||
while 0 <= index < length:
|
||||
match = self._COOKIE_PATTERN.search(data, index)
|
||||
if not match:
|
||||
break
|
||||
|
||||
index = match.end(0)
|
||||
if match.group("bad"):
|
||||
morsel = None
|
||||
continue
|
||||
|
||||
key, value = match.group("key", "val")
|
||||
|
||||
if key[0] == "$":
|
||||
if morsel is not None:
|
||||
morsel[key[1:]] = True
|
||||
continue
|
||||
|
||||
lower_key = key.lower()
|
||||
if lower_key in self._RESERVED:
|
||||
if morsel is None:
|
||||
continue
|
||||
|
||||
if value is None:
|
||||
if lower_key not in self._FLAGS:
|
||||
morsel = None
|
||||
continue
|
||||
value = True
|
||||
else:
|
||||
value, _ = self.value_decode(value)
|
||||
|
||||
morsel[key] = value
|
||||
|
||||
elif value is not None:
|
||||
morsel = self.get(key, http.cookies.Morsel())
|
||||
real_value, coded_value = self.value_decode(value)
|
||||
morsel.set(key, real_value, coded_value)
|
||||
self[key] = morsel
|
||||
|
||||
else:
|
||||
morsel = None
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
remove_start,
|
||||
sanitize_open,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
@@ -92,6 +93,7 @@ def _set_ydl(self, ydl):
|
||||
|
||||
for func in (
|
||||
'deprecation_warning',
|
||||
'deprecated_feature',
|
||||
'report_error',
|
||||
'report_file_already_downloaded',
|
||||
'report_warning',
|
||||
@@ -119,11 +121,11 @@ def format_seconds(seconds):
|
||||
time = timetuple_from_msec(seconds * 1000)
|
||||
if time.hours > 99:
|
||||
return '--:--:--'
|
||||
if not time.hours:
|
||||
return '%02d:%02d' % time[1:-1]
|
||||
return '%02d:%02d:%02d' % time[:-1]
|
||||
|
||||
format_eta = format_seconds
|
||||
@classmethod
|
||||
def format_eta(cls, seconds):
|
||||
return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
@@ -331,6 +333,8 @@ def with_fields(*tups, default=''):
|
||||
return tmpl
|
||||
return default
|
||||
|
||||
_formats_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
|
||||
|
||||
if s['status'] == 'finished':
|
||||
if self.params.get('noprogress'):
|
||||
self.to_screen('[download] Download completed')
|
||||
@@ -338,7 +342,7 @@ def with_fields(*tups, default=''):
|
||||
s.update({
|
||||
'speed': speed,
|
||||
'_speed_str': self.format_speed(speed).strip(),
|
||||
'_total_bytes_str': format_bytes(s.get('total_bytes')),
|
||||
'_total_bytes_str': _formats_bytes('total_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
'_percent_str': self.format_percent(100),
|
||||
})
|
||||
@@ -353,15 +357,15 @@ def with_fields(*tups, default=''):
|
||||
return
|
||||
|
||||
s.update({
|
||||
'_eta_str': self.format_eta(s.get('eta')),
|
||||
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
||||
'_speed_str': self.format_speed(s.get('speed')),
|
||||
'_percent_str': self.format_percent(try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)),
|
||||
'_total_bytes_str': format_bytes(s.get('total_bytes')),
|
||||
'_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')),
|
||||
'_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')),
|
||||
'_total_bytes_str': _formats_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _formats_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _formats_bytes('downloaded_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
})
|
||||
|
||||
|
||||
@@ -252,6 +252,10 @@ def supports_manifest(manifest):
|
||||
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||
return all(check_results)
|
||||
|
||||
@staticmethod
|
||||
def _aria2c_filename(fn):
|
||||
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
@@ -280,11 +284,9 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
# https://github.com/aria2/aria2/issues/1373
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if dn:
|
||||
if not os.path.isabs(dn):
|
||||
dn = f'.{os.path.sep}{dn}'
|
||||
cmd += ['--dir', dn + os.path.sep]
|
||||
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
|
||||
if 'fragments' not in info_dict:
|
||||
cmd += ['--out', f'.{os.path.sep}{os.path.basename(tmpfilename)}']
|
||||
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
|
||||
cmd += ['--auto-file-renaming=false']
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
@@ -293,11 +295,11 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
url_list = []
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||
stream.write('\n'.join(url_list).encode())
|
||||
stream.close()
|
||||
cmd += ['-i', url_list_file]
|
||||
cmd += ['-i', self._aria2c_filename(url_list_file)]
|
||||
else:
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -515,16 +517,14 @@ class AVconvFD(FFmpegFD):
|
||||
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
|
||||
}
|
||||
|
||||
_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
|
||||
|
||||
|
||||
def list_external_downloaders():
|
||||
return sorted(_BY_NAME.keys())
|
||||
|
||||
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
""" Given the name of the executable, see whether we support the given downloader """
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME.get(bn, _BY_EXE.get(bn))
|
||||
return _BY_NAME.get(bn) or next((
|
||||
klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
|
||||
), None)
|
||||
|
||||
@@ -65,8 +65,8 @@ class FragmentFD(FileDownloader):
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.deprecation_warning(
|
||||
'yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. Use yt_dlp.downloader.FileDownloader.report_retry instead')
|
||||
self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. '
|
||||
'Use yt_dlp.downloader.FileDownloader.report_retry instead')
|
||||
return self.report_retry(err, count, retries, frag_index)
|
||||
|
||||
def report_skip_fragment(self, frag_index, err=None):
|
||||
|
||||
@@ -138,6 +138,8 @@ def write_piff_header(stream, params):
|
||||
|
||||
if fourcc == 'AACL':
|
||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||
if fourcc == 'EC-3':
|
||||
sample_entry_box = box(b'ec-3', sample_entry_payload)
|
||||
elif stream_type == 'video':
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import signal
|
||||
@@ -5,7 +6,6 @@
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..compat import asyncio
|
||||
from ..dependencies import websockets
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,29 @@
|
||||
# flake8: noqa: F401
|
||||
|
||||
from .youtube import ( # Youtube is moved to the top to improve performance
|
||||
YoutubeIE,
|
||||
YoutubeClipIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeNotificationsIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeShortsAudioPivotIE
|
||||
)
|
||||
|
||||
from .abc import (
|
||||
ABCIE,
|
||||
ABCIViewIE,
|
||||
@@ -61,7 +85,6 @@
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .angel import AngelIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
@@ -149,6 +172,7 @@
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
from .beatport import BeatportIE
|
||||
from .berufetv import BerufeTVIE
|
||||
from .bet import BetIE
|
||||
from .bfi import BFIPlayerIE
|
||||
from .bfmtv import (
|
||||
@@ -168,7 +192,9 @@
|
||||
BilibiliAudioIE,
|
||||
BilibiliAudioAlbumIE,
|
||||
BiliBiliPlayerIE,
|
||||
BilibiliChannelIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
@@ -194,6 +220,7 @@
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .booyah import BooyahClipsIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
@@ -207,6 +234,7 @@
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
@@ -306,6 +334,7 @@
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .comedycentral import (
|
||||
@@ -384,7 +413,7 @@
|
||||
DeezerAlbumIE,
|
||||
)
|
||||
from .democracynow import DemocracynowIE
|
||||
from .detik import Detik20IE
|
||||
from .detik import DetikEmbedIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
@@ -470,6 +499,7 @@
|
||||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
@@ -491,6 +521,7 @@
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
from .euscreen import EUScreenIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
@@ -623,6 +654,7 @@
|
||||
)
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gopro import GoProIE
|
||||
from .goplay import GoPlayIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
@@ -689,6 +721,7 @@
|
||||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .iltalehti import IltalehtiIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
@@ -720,6 +753,11 @@
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
from .islamchannel import (
|
||||
IslamChannelIE,
|
||||
IslamChannelSeriesIE,
|
||||
)
|
||||
from .israelnationalnews import IsraelNationalNewsIE
|
||||
from .itprotv import (
|
||||
ITProTVIE,
|
||||
ITProTVCourseIE
|
||||
@@ -909,6 +947,7 @@
|
||||
MediasiteCatalogIE,
|
||||
MediasiteNamedCatalogIE,
|
||||
)
|
||||
from .mediaworksnz import MediaWorksNZVODIE
|
||||
from .medici import MediciIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .meipai import MeipaiIE
|
||||
@@ -924,6 +963,7 @@
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
)
|
||||
from .microsoftembed import MicrosoftEmbedIE
|
||||
from .mildom import (
|
||||
MildomIE,
|
||||
MildomVodIE,
|
||||
@@ -1045,6 +1085,7 @@
|
||||
NBCSportsIE,
|
||||
NBCSportsStreamIE,
|
||||
NBCSportsVPlayerIE,
|
||||
NBCStationsIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@@ -1079,6 +1120,7 @@
|
||||
NewgroundsPlaylistIE,
|
||||
NewgroundsUserIE,
|
||||
)
|
||||
from .newspicks import NewsPicksIE
|
||||
from .newstube import NewstubeIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
@@ -1192,6 +1234,7 @@
|
||||
from .on24 import On24IE
|
||||
from .ondemandkorea import OnDemandKoreaIE
|
||||
from .onefootball import OneFootballIE
|
||||
from .onenewsnz import OneNewsNZIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
@@ -1340,6 +1383,7 @@
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
@@ -1585,6 +1629,7 @@
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import (
|
||||
@@ -1728,6 +1773,14 @@
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tempo import TempoIE
|
||||
from .tencent import (
|
||||
IflixEpisodeIE,
|
||||
IflixSeriesIE,
|
||||
VQQSeriesIE,
|
||||
VQQVideoIE,
|
||||
WeTvEpisodeIE,
|
||||
WeTvSeriesIE,
|
||||
)
|
||||
from .tennistv import TennisTVIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
@@ -1787,6 +1840,10 @@
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerUserIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
@@ -1820,6 +1877,10 @@
|
||||
KatsomoIE,
|
||||
MTVUutisetArticleIE,
|
||||
)
|
||||
from .tv24ua import (
|
||||
TV24UAVideoIE,
|
||||
TV24UAGenericPassthroughIE
|
||||
)
|
||||
from .tv2dk import (
|
||||
TV2DKIE,
|
||||
TV2DKBornholmPlayIE,
|
||||
@@ -1922,6 +1983,7 @@
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
from .unscripted import UnscriptedNewsVideoIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
@@ -1979,7 +2041,6 @@
|
||||
VidioLiveIE
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewlift import (
|
||||
ViewLiftIE,
|
||||
ViewLiftEmbedIE,
|
||||
@@ -2092,7 +2153,6 @@
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wetv import WeTvEpisodeIE, WeTvSeriesIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimtv import WimTVIE
|
||||
@@ -2100,7 +2160,9 @@
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
WistiaChannelIE,
|
||||
)
|
||||
from .wordpress import WordpressPlaylistEmbedIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wppilot import (
|
||||
WPPilotIE,
|
||||
@@ -2175,28 +2237,6 @@
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeClipIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeNotificationsIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
|
||||
@@ -84,7 +84,7 @@ def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_all = self._search_json(r'window.videoInfo\s*=\s*', webpage, 'videoInfo', video_id)
|
||||
json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id)
|
||||
|
||||
title = json_all.get('title')
|
||||
video_list = json_all.get('videoList') or []
|
||||
@@ -164,7 +164,7 @@ def _real_extract(self, url):
|
||||
video_id = f'{video_id}{format_field(ac_idx, template="__%s")}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_bangumi_data = self._search_json(r'window.bangumiData\s*=\s*', webpage, 'bangumiData', video_id)
|
||||
json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
|
||||
|
||||
if ac_idx:
|
||||
video_info = json_bangumi_data['hlVideoInfo']
|
||||
@@ -181,7 +181,7 @@ def _real_extract(self, url):
|
||||
if v.get('id') == season_id), 1)
|
||||
|
||||
json_bangumi_list = self._search_json(
|
||||
r'window\.bangumiList\s*=\s*', webpage, 'bangumiList', video_id, fatal=False)
|
||||
r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False)
|
||||
video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
|
||||
episode_number = video_internal_id and next((
|
||||
idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
|
||||
|
||||
@@ -1344,6 +1344,11 @@
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'AlticeOne': {
|
||||
'name': 'Optimum TV',
|
||||
'username_field': 'j_username',
|
||||
'password_field': 'j_password',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1705,7 +1710,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password
|
||||
}
|
||||
if mso_id == 'Cablevision':
|
||||
if mso_id in ('Cablevision', 'AlticeOne'):
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
if mso_id != 'Rogers':
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import ExtractorError, int_or_none
|
||||
|
||||
|
||||
class AmazonStoreIE(InfoExtractor):
|
||||
@@ -9,7 +9,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
|
||||
'info_dict': {
|
||||
'id': 'B098XNCHLD',
|
||||
'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed',
|
||||
'title': 'md5:dae240564cbb2642170c02f7f0d7e472',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'playlist': [{
|
||||
@@ -18,28 +18,44 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'mcdodo usb c cable 100W 5a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 34,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
|
||||
'info_dict': {
|
||||
'id': 'B0863TXGM3',
|
||||
'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff',
|
||||
'title': 'md5:d1d3352428f8f015706c84b31e132169',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://www.amazon.com/dp/B0845NXCXF/',
|
||||
'info_dict': {
|
||||
'id': 'B0845NXCXF',
|
||||
'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171',
|
||||
'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e',
|
||||
},
|
||||
'playlist-mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
|
||||
'info_dict': {
|
||||
'id': 'B08WX337PQ',
|
||||
'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
transform_source=lambda x: x.replace(R'\\u', R'\u'))
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
|
||||
entries = [{
|
||||
'id': video['marketPlaceID'],
|
||||
'url': video['url'],
|
||||
@@ -49,4 +65,4 @@ def _real_extract(self, url):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title'])
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
|
||||
@@ -1,282 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
join_nonempty,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AnimeOnDemandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
# German-speaking countries of Europe
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
'title': 'Grimgar, Ashes and Illusions (OmU)',
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Flash videos
|
||||
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
|
||||
self.raise_geo_restricted(
|
||||
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
|
||||
|
||||
login_form = self._form_hidden_inputs('new_user', login_page)
|
||||
|
||||
login_form.update({
|
||||
'user[login]': username,
|
||||
'user[password]': password,
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form), headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
|
||||
response, 'error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_extract(self, url):
|
||||
anime_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
if 'data-playlist=' not in webpage:
|
||||
self._download_webpage(
|
||||
self._APPLY_HTML5_URL, anime_id,
|
||||
'Activating HTML5 beta', 'Unable to apply HTML5 beta')
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', fatal=True)
|
||||
|
||||
anime_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
|
||||
webpage, 'anime name')
|
||||
anime_description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
title = attributes.get('data-dialog-header')
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
playlist_urls.append(attributes[playlist_key])
|
||||
if not playlist_urls:
|
||||
continue
|
||||
|
||||
lang = attributes.get('data-lang')
|
||||
lang_note = attributes.get('value')
|
||||
|
||||
for playlist_url in playlist_urls:
|
||||
kind = self._search_regex(
|
||||
r'videomaterialurl/\d+/([^/]+)/',
|
||||
playlist_url, 'media kind', default=None)
|
||||
format_id = join_nonempty(lang, kind) if lang or kind else str(num)
|
||||
format_note = join_nonempty(kind, lang_note, delim=', ')
|
||||
item_id_list = []
|
||||
if format_id:
|
||||
item_id_list.append(format_id)
|
||||
item_id_list.append('videomaterial')
|
||||
playlist = self._download_json(
|
||||
urljoin(url, playlist_url), video_id,
|
||||
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
stream_url)
|
||||
if rtmp:
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'page_url': url,
|
||||
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||
'rtmp_real_time': True,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
continue
|
||||
playlist = playlist[start_video]
|
||||
title = playlist.get('title')
|
||||
if not title:
|
||||
continue
|
||||
description = playlist.get('description')
|
||||
for source in playlist.get('sources', []):
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
format_id = join_nonempty(
|
||||
lang, kind,
|
||||
'hls' if ext == 'm3u8' else None,
|
||||
'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
|
||||
if ext == 'm3u8':
|
||||
file_formats = self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
continue
|
||||
file_formats = self._extract_mpd_formats(
|
||||
file_, video_id, mpd_id=format_id, fatal=False)
|
||||
else:
|
||||
continue
|
||||
for f in file_formats:
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': format_note,
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
yield f
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
yield f
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
for e in extract_entries(episode_html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
for e in extract_entries(html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def entries():
|
||||
has_episodes = False
|
||||
for e in extract_episodes(webpage):
|
||||
has_episodes = True
|
||||
yield e
|
||||
|
||||
if not has_episodes:
|
||||
for e in extract_film(webpage, anime_id):
|
||||
yield e
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), anime_id, anime_title, anime_description)
|
||||
@@ -5,31 +5,70 @@
|
||||
import re
|
||||
import time
|
||||
|
||||
from .anvato_token_generator import NFLTokenGenerator
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
def md5_text(s):
|
||||
if not isinstance(s, compat_str):
|
||||
s = compat_str(s)
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
return hashlib.md5(str(s).encode()).hexdigest()
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
|
||||
|
||||
_API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2'
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
'info_dict': {
|
||||
'id': '8032455',
|
||||
'ext': 'mp4',
|
||||
'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream',
|
||||
'description': 'md5:0a12bab8159445e78f52a297a35c6609',
|
||||
'upload_date': '20220928',
|
||||
'timestamp': 1664408881,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'LIN',
|
||||
'tags': ['video', 'news', '5live'],
|
||||
'duration': 155,
|
||||
'categories': ['News'],
|
||||
},
|
||||
}]
|
||||
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
@@ -202,86 +241,74 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
if self.__server_time is not None:
|
||||
return self.__server_time
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
note='Fetching server time', fatal=False), 'server_time')) or int(time.time())
|
||||
|
||||
self.__server_time = int(self._download_json(
|
||||
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||
note='Fetching server time')['server_time'])
|
||||
|
||||
return self.__server_time
|
||||
|
||||
def _api_prefix(self, access_key):
|
||||
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||
|
||||
def _get_video_json(self, access_key, video_id):
|
||||
def _get_video_json(self, access_key, video_id, extracted_token):
|
||||
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||
video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}'
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
}
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
api = {
|
||||
'anvrid': anvrid,
|
||||
'anvts': server_time,
|
||||
}
|
||||
if self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
api['anvstk2'] = 'default'
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps({'api': api}).encode('utf-8'))
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
video_url = published_url.get('embed_url')
|
||||
if not video_url:
|
||||
continue
|
||||
media_format = published_url.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
@@ -296,15 +323,27 @@ def _get_anvato_videos(self, access_key, video_id):
|
||||
'tbr': tbr or None,
|
||||
}
|
||||
|
||||
if media_format == 'm3u8' and tbr is not None:
|
||||
vtt_subs, hls_subs = {}, {}
|
||||
if media_format == 'vtt':
|
||||
_, vtt_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, m3u8_id='vtt', fatal=False)
|
||||
continue
|
||||
elif media_format == 'm3u8' and tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': join_nonempty('hls', tbr),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
# For some videos the initial m3u8 URL returns JSON instead
|
||||
manifest_json = self._download_json(
|
||||
video_url, video_id, note='Downloading manifest JSON', errnote=False)
|
||||
if manifest_json:
|
||||
video_url = manifest_json.get('master_m3u8')
|
||||
if not video_url:
|
||||
continue
|
||||
hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(hls_fmts)
|
||||
continue
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
@@ -324,6 +363,7 @@ def _get_anvato_videos(self, access_key, video_id):
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -349,7 +389,10 @@ def _extract_from_webpage(cls, url, webpage):
|
||||
access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
|
||||
if not (video_id or '').isdigit() or not access_key:
|
||||
continue
|
||||
yield cls.url_result(f'anvato:{access_key}:{video_id}', AnvatoIE, video_id)
|
||||
url = f'anvato:{access_key}:{video_id}'
|
||||
if anvplayer_data.get('token'):
|
||||
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
||||
yield cls.url_result(url, AnvatoIE, video_id)
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(
|
||||
@@ -357,7 +400,7 @@ def _extract_anvato_videos(self, webpage, video_id):
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@@ -365,9 +408,7 @@ def _real_extract(self, url):
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token'))
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
from .nfl import NFLTokenGenerator
|
||||
|
||||
__all__ = [
|
||||
'NFLTokenGenerator',
|
||||
]
|
||||
@@ -1,3 +0,0 @@
|
||||
class TokenGenerator:
|
||||
def generate(self, anvack, mcp_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
@@ -1,28 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import TokenGenerator
|
||||
|
||||
|
||||
class NFLTokenGenerator(TokenGenerator):
|
||||
_AUTHORIZATION = None
|
||||
|
||||
def generate(ie, anvack, mcp_id):
|
||||
if not NFLTokenGenerator._AUTHORIZATION:
|
||||
reroute = ie._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id,
|
||||
data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100})
|
||||
NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
|
||||
return ie._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': NFLTokenGenerator._AUTHORIZATION,
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['mediaToken']['token']
|
||||
@@ -526,9 +526,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x)
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x:
|
||||
(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
|
||||
{YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}'''
|
||||
{YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}
|
||||
)'''
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
|
||||
@@ -95,24 +95,24 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
|
||||
# all obtained by exhaustive testing
|
||||
_COUNTRIES_MAP = {
|
||||
'DE_FR': {
|
||||
'DE_FR': (
|
||||
'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
|
||||
'PF', 'PM', 'RE', 'WF', 'YT',
|
||||
},
|
||||
),
|
||||
# with both of the below 'BE' sometimes works, sometimes doesn't
|
||||
'EUR_DE_FR': {
|
||||
'EUR_DE_FR': (
|
||||
'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
|
||||
'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
|
||||
'YT',
|
||||
},
|
||||
'SAT': {
|
||||
),
|
||||
'SAT': (
|
||||
'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
|
||||
'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
|
||||
'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
|
||||
'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
|
||||
'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
|
||||
'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -135,6 +135,7 @@ def _real_extract(self, url):
|
||||
'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
secondary_formats = []
|
||||
for stream in config['data']['attributes']['streams']:
|
||||
# official player contains code like `e.get("versions")[0].eStat.ml5`
|
||||
stream_version = stream['versions'][0]
|
||||
@@ -152,22 +153,26 @@ def _real_extract(self, url):
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update({
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]',
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
|
||||
'language_preference': lang_pref,
|
||||
})
|
||||
formats.extend(fmts)
|
||||
if any(map(short_label.startswith, ('cc', 'OGsub'))):
|
||||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
formats.append({
|
||||
'format_id': f'{stream["protocol"]}-{stream_version_code}',
|
||||
'url': stream['url'],
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]',
|
||||
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
|
||||
'language_preference': lang_pref,
|
||||
# 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS
|
||||
})
|
||||
@@ -179,6 +184,8 @@ def _real_extract(self, url):
|
||||
# The JS also looks for chapters in config['data']['attributes']['chapters'],
|
||||
# but I am yet to find a video having those
|
||||
|
||||
formats.extend(secondary_formats)
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = config['data']['attributes']['metadata']
|
||||
|
||||
@@ -1,24 +1,33 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
)
|
||||
from ..utils import clean_html, float_or_none, traverse_obj, unescapeHTML
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||
'md5': '4d68be11c9f9daf3dab0778ad1e010c3',
|
||||
'info_dict': {
|
||||
'id': '7398103',
|
||||
'ext': 'mp3',
|
||||
'title': 'Asim Chaudhry',
|
||||
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||
'description': 'md5:0ed714ae0e81e5d9119cac2f618ad679',
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
}, { # Direct mp3-file link
|
||||
'url': 'https://audioboom.com/posts/8128496.mp3',
|
||||
'md5': 'e329edf304d450def95c7f86a9165ee1',
|
||||
'info_dict': {
|
||||
'id': '8128496',
|
||||
'ext': 'mp3',
|
||||
'title': 'TCRNo8 / DAILY 03 - In Control',
|
||||
'description': 'md5:44665f142db74858dfa21c5b34787948',
|
||||
'duration': 1689.7,
|
||||
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
@@ -26,45 +35,23 @@ class AudioBoomIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://audioboom.com/posts/{video_id}', video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
clips = clip_store.get('clips')
|
||||
if clips and isinstance(clips, list) and isinstance(clips[0], dict):
|
||||
clip = clips[0]
|
||||
|
||||
def from_clip(field):
|
||||
if clip:
|
||||
return clip.get(field)
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._html_search_meta(
|
||||
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
clip_store = self._search_json(
|
||||
r'data-react-class="V5DetailPagePlayer"\s*data-react-props=["\']',
|
||||
webpage, 'clip store', video_id, fatal=False, transform_source=unescapeHTML)
|
||||
clip = traverse_obj(clip_store, ('clips', 0), expected_type=dict) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'url': clip.get('clipURLPriorToLoading') or self._og_search_property('audio', webpage, 'audio url'),
|
||||
'title': clip.get('title') or self._html_search_meta(['og:title', 'og:audio:title', 'audio_title'], webpage),
|
||||
'description': (clip.get('description') or clean_html(clip.get('formattedDescription'))
|
||||
or self._og_search_description(webpage)),
|
||||
'duration': float_or_none(clip.get('duration') or self._html_search_meta('weibo:audio:duration', webpage)),
|
||||
'uploader': clip.get('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader'),
|
||||
'uploader_url': clip.get('author_url') or self._html_search_regex(
|
||||
r'<div class="avatar flex-shrink-0">\s*<a href="(?P<uploader_url>http[^"]+)"',
|
||||
webpage, 'uploader url', fatal=False),
|
||||
}
|
||||
|
||||
@@ -5,23 +5,23 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
@@ -85,7 +85,7 @@ def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = self._match_id(url)
|
||||
title, uploader = self._match_valid_url(url).group('id', 'uploader')
|
||||
webpage = self._download_webpage(url, title)
|
||||
tralbum = self._extract_data_attr(webpage, title)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -197,6 +197,8 @@ def _real_extract(self, url):
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'uploader_id': uploader,
|
||||
'uploader_url': f'https://{uploader}.bandcamp.com',
|
||||
'timestamp': timestamp,
|
||||
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
|
||||
70
yt_dlp/extractor/berufetv.py
Normal file
70
yt_dlp/extractor/berufetv.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, mimetype2ext, traverse_obj
|
||||
|
||||
|
||||
class BerufeTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
|
||||
'md5': '041b6432ec8e6838f84a5c30f31cc795',
|
||||
'info_dict': {
|
||||
'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswirtschaftslehre',
|
||||
'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
|
||||
'categories': ['Studien­beruf'],
|
||||
'tags': ['Studienfilm'],
|
||||
'duration': 602.440,
|
||||
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
movie_metadata = self._download_json(
|
||||
'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
|
||||
video_id, 'Downloading JSON metadata',
|
||||
headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
|
||||
|
||||
meta = traverse_obj(
|
||||
movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
|
||||
get_all=False, default={})
|
||||
|
||||
video = self._download_json(
|
||||
f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for key, source in video['videoSources']['html'].items():
|
||||
if key == 'auto':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
|
||||
formats += fmts
|
||||
subtitles = subs
|
||||
else:
|
||||
formats.append({
|
||||
'url': source[0]['source'],
|
||||
'ext': mimetype2ext(source[0]['mimeType']),
|
||||
'format_id': key,
|
||||
})
|
||||
|
||||
for track in video.get('videoTracks') or []:
|
||||
if track.get('type') != 'SUBTITLES':
|
||||
continue
|
||||
subtitles.setdefault(track['language'], []).append({
|
||||
'url': track['source'],
|
||||
'name': track.get('label'),
|
||||
'ext': 'vtt'
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
|
||||
'description': meta.get('beschreibung'),
|
||||
'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
|
||||
'duration': float_or_none(video.get('duration'), scale=1000),
|
||||
'categories': [meta['kategorie']] if meta.get('kategorie') else None,
|
||||
'tags': meta.get('themengebiete'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -2,8 +2,9 @@
|
||||
import hashlib
|
||||
import itertools
|
||||
import functools
|
||||
import re
|
||||
import math
|
||||
import re
|
||||
import urllib
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
@@ -13,23 +14,24 @@
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
parse_count,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
OnDemandPagedList
|
||||
)
|
||||
|
||||
|
||||
@@ -218,6 +220,9 @@ def _real_extract(self, url):
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
flac_audio = traverse_obj(video_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
@@ -502,39 +507,135 @@ def _real_extract(self, url):
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
||||
|
||||
class BilibiliChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
|
||||
_API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
first_page = fetch_page(0)
|
||||
metadata = get_metadata(first_page)
|
||||
|
||||
paged_list = InAdvancePagedList(
|
||||
lambda idx: get_entries(fetch_page(idx) if idx else first_page),
|
||||
metadata['page_count'], metadata['page_size'])
|
||||
|
||||
return metadata, paged_list
|
||||
|
||||
|
||||
class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/video',
|
||||
'info_dict': {},
|
||||
'playlist_mincount': 112,
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
}]
|
||||
|
||||
def _entries(self, list_id):
|
||||
count, max_count = 0, None
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
|
||||
def fetch_page(page_idx):
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] == -401:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
return response['data']
|
||||
|
||||
max_count = max_count or traverse_obj(data, ('page', 'count'))
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['ps']
|
||||
entry_count = page_data['page']['count']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
}
|
||||
|
||||
entries = traverse_obj(data, ('list', 'vlist'))
|
||||
if not entries:
|
||||
return
|
||||
for entry in entries:
|
||||
yield self.url_result(
|
||||
'https://www.bilibili.com/video/%s' % entry['bvid'],
|
||||
BiliBiliIE.ie_key(), entry['bvid'])
|
||||
def get_entries(page_data):
|
||||
for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||
|
||||
count += len(entries)
|
||||
if max_count and count >= max_count:
|
||||
return
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/audio',
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
|
||||
note=f'Downloading page {page_idx}',
|
||||
query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
return {
|
||||
'page_count': page_data['pageCount'],
|
||||
'page_size': page_data['pageSize'],
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('data', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '《底特律 变人》'
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['page_size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
@@ -620,14 +721,15 @@ def _search_results(self, query):
|
||||
'keyword': query,
|
||||
'page': page_num,
|
||||
'context': '',
|
||||
'order': 'pubdate',
|
||||
'duration': 0,
|
||||
'tids_2': '',
|
||||
'__refresh__': 'true',
|
||||
'search_type': 'video',
|
||||
'tids': 0,
|
||||
'highlight': 1,
|
||||
})['data'].get('result') or []
|
||||
})['data'].get('result')
|
||||
if not videos:
|
||||
break
|
||||
for video in videos:
|
||||
yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
|
||||
|
||||
@@ -905,7 +1007,7 @@ def _perform_login(self, username, password):
|
||||
|
||||
|
||||
class BiliIntlIE(BiliIntlBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
|
||||
_TESTS = [{
|
||||
# Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/34613/341736',
|
||||
@@ -948,6 +1050,10 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
# No language in URL
|
||||
'url': 'https://www.bilibili.tv/video/2019955076',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Uppercase language in URL
|
||||
'url': 'https://www.bilibili.tv/EN/video/2019955076',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -971,7 +1077,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.tv/en/play/34613',
|
||||
'playlist_mincount': 15,
|
||||
@@ -989,6 +1095,9 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.biliintl.com/en/play/34613',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.biliintl.com/EN/play/34613',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
|
||||
@@ -65,10 +65,12 @@ def _real_extract(self, url):
|
||||
error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
|
||||
if error == 'Video Unavailable':
|
||||
raise GeoRestrictedError(error)
|
||||
raise ExtractorError(error)
|
||||
raise ExtractorError(error, expected=True)
|
||||
formats = entries[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
if not formats:
|
||||
raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
|
||||
@@ -8,13 +8,28 @@
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.bongacams.net/claireashton',
|
||||
'info_dict': {
|
||||
'id': 'claireashton',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'age_limit': 18,
|
||||
'uploader_id': 'ClaireAshton',
|
||||
'uploader': 'ClaireAshton',
|
||||
'like_count': int,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
87
yt_dlp/extractor/booyah.py
Normal file
87
yt_dlp/extractor/booyah.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, traverse_obj
|
||||
|
||||
|
||||
class BooyahBaseIE(InfoExtractor):
|
||||
_BOOYAH_SESSION_KEY = None
|
||||
|
||||
def _real_initialize(self):
|
||||
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
|
||||
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
comment_json = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
|
||||
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
|
||||
|
||||
return [{
|
||||
'id': comment.get('comment_id'),
|
||||
'author': comment.get('from_nickname'),
|
||||
'author_id': comment.get('from_uid'),
|
||||
'author_thumbnail': comment.get('from_thumbnail'),
|
||||
'text': comment.get('content'),
|
||||
'timestamp': comment.get('create_time'),
|
||||
'like_count': comment.get('like_cnt'),
|
||||
} for comment in comment_json.get('comment_list') or ()]
|
||||
|
||||
|
||||
class BooyahClipsIE(BooyahBaseIE):
|
||||
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://booyah.live/clips/13887261322952306617',
|
||||
'info_dict': {
|
||||
'id': '13887261322952306617',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'duration': 30,
|
||||
'channel_id': 90565760,
|
||||
'like_count': int,
|
||||
'title': 'Cayendo con estilo 😎',
|
||||
'uploader': '♡LɪꜱGΛMER',
|
||||
'comment_count': int,
|
||||
'uploader_id': '90565760',
|
||||
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
|
||||
'upload_date': '20220617',
|
||||
'timestamp': 1655490556,
|
||||
'modified_timestamp': 1655490556,
|
||||
'modified_date': '20220617',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
|
||||
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
|
||||
|
||||
formats = []
|
||||
for video_data in json_data['playback']['endpoint_list']:
|
||||
formats.extend(({
|
||||
'url': video_data.get('stream_url'),
|
||||
'ext': 'mp4',
|
||||
'height': video_data.get('resolution'),
|
||||
}, {
|
||||
'url': video_data.get('download_url'),
|
||||
'ext': 'mp4',
|
||||
'format_note': 'Watermarked',
|
||||
'height': video_data.get('resolution'),
|
||||
'preference': -10,
|
||||
}))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(json_data, ('playback', 'name')),
|
||||
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
|
||||
'formats': formats,
|
||||
'view_count': traverse_obj(json_data, ('playback', 'views')),
|
||||
'like_count': traverse_obj(json_data, ('playback', 'likes')),
|
||||
'duration': traverse_obj(json_data, ('playback', 'duration')),
|
||||
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
|
||||
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
|
||||
'uploader': traverse_obj(json_data, ('user', 'nickname')),
|
||||
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
|
||||
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
|
||||
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
|
||||
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
|
||||
}
|
||||
34
yt_dlp/extractor/bundesliga.py
Normal file
34
yt_dlp/extractor/bundesliga.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
|
||||
|
||||
class BundesligaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bundesliga\.com/[a-z]{2}/bundesliga/videos(?:/[^?]+)?\?vid=(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.bundesliga.com/en/bundesliga/videos?vid=bhhHkKyN',
|
||||
'md5': '8fc3b25cd12440e3a8cdc51f1493849c',
|
||||
'info_dict': {
|
||||
'id': 'bhhHkKyN',
|
||||
'ext': 'mp4',
|
||||
'title': 'Watch: Alphonso Davies and Jeremie Frimpong head-to-head',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/bhhHkKyN/poster.jpg?width=720',
|
||||
'upload_date': '20220928',
|
||||
'duration': 146,
|
||||
'timestamp': 1664366511,
|
||||
'description': 'md5:803d4411bd134140c774021dd4b7598b'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G',
|
||||
'only_matching': True
|
||||
},
|
||||
{
|
||||
'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA',
|
||||
'only_matching': True
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(f'jwplatform:{video_id}', JWPlatformIE, video_id)
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import url_basename
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
@@ -141,3 +141,58 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.cnnindonesia\.com/[\w-]+/(?P<upload_date>\d{8})\d+-\d+-(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cnnindonesia.com/ekonomi/20220909212635-89-845885/alasan-harga-bbm-di-indonesia-masih-disubsidi',
|
||||
'info_dict': {
|
||||
'id': '845885',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e7954bfa6f1749bc9ef0c079a719c347',
|
||||
'upload_date': '20220909',
|
||||
'title': 'Alasan Harga BBM di Indonesia Masih Disubsidi',
|
||||
'timestamp': 1662859088,
|
||||
'duration': 120.0,
|
||||
'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/09/thumbnail-ekopedia-alasan-harga-bbm-disubsidi_169\.jpeg',
|
||||
'tags': ['ekopedia', 'subsidi bbm', 'subsidi', 'bbm', 'bbm subsidi', 'harga pertalite naik'],
|
||||
'age_limit': 0,
|
||||
'release_timestamp': 1662859088,
|
||||
'release_date': '20220911',
|
||||
'uploader': 'Asfahan Yahsyi',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris',
|
||||
'info_dict': {
|
||||
'id': '846189',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220911',
|
||||
'duration': 76.0,
|
||||
'timestamp': 1662869995,
|
||||
'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
|
||||
'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169\.jpeg',
|
||||
'title': 'VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
|
||||
'tags': ['raja charles', 'raja charles iii', 'ratu elizabeth', 'ratu elizabeth meninggal dunia', 'raja inggris', 'inggris'],
|
||||
'age_limit': 0,
|
||||
'release_date': '20220911',
|
||||
'uploader': 'REUTERS',
|
||||
'release_timestamp': 1662869995,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
upload_date, video_id, display_id = self._match_valid_url(url).group('upload_date', 'id', 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_ld_list = list(self._yield_json_ld(webpage, display_id))
|
||||
json_ld_data = self._json_ld(json_ld_list, display_id)
|
||||
embed_url = next(
|
||||
json_ld.get('embedUrl') for json_ld in json_ld_list if json_ld.get('@type') == 'VideoObject')
|
||||
|
||||
return merge_dicts(json_ld_data, {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'upload_date': upload_date,
|
||||
'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', '))
|
||||
})
|
||||
|
||||
@@ -5,12 +5,14 @@
|
||||
import http.client
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import inspect
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
@@ -18,8 +20,9 @@
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools, re # isort: split
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader import FileDownloader
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..utils import (
|
||||
@@ -479,6 +482,9 @@ class InfoExtractor:
|
||||
will be used by geo restriction bypass mechanism similarly
|
||||
to _GEO_COUNTRIES.
|
||||
|
||||
The _ENABLED attribute should be set to False for IEs that
|
||||
are disabled by default and must be explicitly enabled.
|
||||
|
||||
The _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
@@ -490,6 +496,7 @@ class InfoExtractor:
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
_ENABLED = True
|
||||
_NETRC_MACHINE = None
|
||||
IE_DESC = None
|
||||
SEARCH_KEY = None
|
||||
@@ -504,7 +511,7 @@ def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -1220,7 +1227,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
|
||||
return None
|
||||
|
||||
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||
contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
|
||||
contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
|
||||
"""Searches string for the JSON object specified by start_pattern"""
|
||||
# NB: end_pattern is only used to reduce the size of the initial match
|
||||
if default is NO_DEFAULT:
|
||||
@@ -1229,7 +1236,7 @@ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||
fatal, has_default = False, True
|
||||
|
||||
json_string = self._search_regex(
|
||||
rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
|
||||
rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
|
||||
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
|
||||
if not json_string:
|
||||
return default
|
||||
@@ -1529,10 +1536,10 @@ def extract_chapter_information(e):
|
||||
info['chapters'] = chapters
|
||||
|
||||
def extract_video_object(e):
|
||||
assert is_type(e, 'VideoObject')
|
||||
author = e.get('author')
|
||||
info.update({
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'ext': mimetype2ext(e.get('encodingFormat')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnails': [{'url': unescapeHTML(url)}
|
||||
@@ -1545,12 +1552,19 @@ def extract_video_object(e):
|
||||
# however some websites are using 'Text' type instead.
|
||||
# 1. https://schema.org/VideoObject
|
||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
|
||||
'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
|
||||
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
'view_count': int_or_none(e.get('interactionCount')),
|
||||
'tags': try_call(lambda: e.get('keywords').split(',')),
|
||||
})
|
||||
if is_type(e, 'AudioObject'):
|
||||
info.update({
|
||||
'vcodec': 'none',
|
||||
'abr': int_or_none(e.get('bitrate')),
|
||||
})
|
||||
extract_interaction_statistic(e)
|
||||
extract_chapter_information(e)
|
||||
|
||||
@@ -1601,7 +1615,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
||||
extract_video_object(e['video'][0])
|
||||
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
|
||||
extract_video_object(e['subjectOf'][0])
|
||||
elif is_type(e, 'VideoObject'):
|
||||
elif is_type(e, 'VideoObject', 'AudioObject'):
|
||||
extract_video_object(e)
|
||||
if expected_type is None:
|
||||
continue
|
||||
@@ -1688,7 +1702,7 @@ class FormatSort:
|
||||
'order_free': ('webm', 'mp4', 'flv', '', 'none')},
|
||||
'aext': {'type': 'ordered', 'field': 'audio_ext',
|
||||
'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
|
||||
'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
|
||||
'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')},
|
||||
'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
|
||||
'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
|
||||
'field': ('vcodec', 'acodec'),
|
||||
@@ -1761,9 +1775,8 @@ def _get_field_setting(self, field, key):
|
||||
if field not in self.settings:
|
||||
if key in ('forced', 'priority'):
|
||||
return False
|
||||
self.ydl.deprecation_warning(
|
||||
f'Using arbitrary fields ({field}) for format sorting is deprecated '
|
||||
'and may be removed in a future version')
|
||||
self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
|
||||
'deprecated and may be removed in a future version')
|
||||
self.settings[field] = {}
|
||||
propObj = self.settings[field]
|
||||
if key not in propObj:
|
||||
@@ -1848,9 +1861,8 @@ def add_item(field, reverse, closest, limit_text):
|
||||
if self._get_field_setting(field, 'type') == 'alias':
|
||||
alias, field = field, self._get_field_setting(field, 'field')
|
||||
if self._get_field_setting(alias, 'deprecated'):
|
||||
self.ydl.deprecation_warning(
|
||||
f'Format sorting alias {alias} is deprecated '
|
||||
f'and may be removed in a future version. Please use {field} instead')
|
||||
self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
|
||||
f'be removed in a future version. Please use {field} instead')
|
||||
reverse = match.group('reverse') is not None
|
||||
closest = match.group('separator') == '~'
|
||||
limit_text = match.group('limit')
|
||||
@@ -2911,6 +2923,8 @@ def extract_Initialization(source):
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
tmpl = representation_ms_info[template_name]
|
||||
if representation_id is not None:
|
||||
tmpl = tmpl.replace('$RepresentationID$', representation_id)
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
@@ -2925,8 +2939,6 @@ def prepare_template(template_name, identifiers):
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
if representation_id is not None:
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
@@ -3112,9 +3124,10 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
for track in stream.findall('QualityLevel'):
|
||||
fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
# TODO: add support for WVC1 and WMAP
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
continue
|
||||
tbr = int(track.attrib['Bitrate']) // 1000
|
||||
@@ -3257,7 +3270,7 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = strip_or_none(media_attributes.get('src'))
|
||||
src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if src:
|
||||
f = parse_content_type(media_attributes.get('type'))
|
||||
_, formats = _media_formats(src, media_type, f)
|
||||
@@ -3268,7 +3281,7 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||
s_attr = extract_attributes(source_tag)
|
||||
# data-video-src and data-src are non standard but seen
|
||||
# several times in the wild
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(s_attr.get('type'))
|
||||
@@ -3574,7 +3587,8 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
'tbr': int_or_none(source.get('bitrate'), scale=1000),
|
||||
'filesize': int_or_none(source.get('filesize')),
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
@@ -3628,7 +3642,7 @@ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||
return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
|
||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
@@ -3852,8 +3866,10 @@ def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_l
|
||||
return True
|
||||
|
||||
def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
|
||||
RetryManager.report_retry(err, _count or int(fatal), _retries, info=self.to_screen, warn=self.report_warning,
|
||||
sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
|
||||
RetryManager.report_retry(
|
||||
err, _count or int(fatal), _retries,
|
||||
info=self.to_screen, warn=self.report_warning, error=None if fatal else self.report_warning,
|
||||
sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
|
||||
|
||||
def RetryManager(self, **kwargs):
|
||||
return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
|
||||
@@ -3871,7 +3887,7 @@ def extract_from_webpage(cls, ydl, url, webpage):
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
for embed_url in orderedSet(
|
||||
cls._extract_embed_urls(url, webpage) or [], lazy=True):
|
||||
yield cls.url_result(embed_url, cls)
|
||||
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
@@ -3897,6 +3913,18 @@ def _extract_url(cls, webpage): # TODO: Remove
|
||||
"""Only for compatibility with some older extractors"""
|
||||
return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
|
||||
|
||||
@classmethod
|
||||
def __init_subclass__(cls, *, plugin_name=None, **kwargs):
|
||||
if plugin_name:
|
||||
mro = inspect.getmro(cls)
|
||||
super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
|
||||
cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key
|
||||
while getattr(super_class, '__wrapped__', None):
|
||||
super_class = super_class.__wrapped__
|
||||
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
|
||||
|
||||
return super().__init_subclass__(**kwargs)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
@@ -3940,3 +3968,12 @@ def _search_results(self, query):
|
||||
@classproperty
|
||||
def SEARCH_KEY(cls):
|
||||
return cls._SEARCH_KEY
|
||||
|
||||
|
||||
class UnsupportedURLIE(InfoExtractor):
|
||||
_VALID_URL = '.*'
|
||||
_ENABLED = False
|
||||
IE_DESC = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
@@ -720,15 +720,20 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
|
||||
def _get_params(self, lang):
|
||||
if not CrunchyrollBetaBaseIE.params:
|
||||
if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'):
|
||||
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
|
||||
else:
|
||||
grant_type, key = 'client_id', 'anonClientId'
|
||||
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
|
||||
f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % app_config['cxApiParams']['accountAuthClientId']).encode('ascii')), 'ascii')
|
||||
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', None, note='Authenticating with cookie',
|
||||
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
|
||||
}, data=f'grant_type={grant_type}'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
|
||||
headers={
|
||||
@@ -747,21 +752,6 @@ def _get_params(self, lang):
|
||||
CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBetaBaseIE.params
|
||||
|
||||
def _redirect_from_beta(self, url, lang, internal_id, display_id, is_episode, iekey):
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(url, display_id), display_id)
|
||||
content_data = initial_state['content']['byId'][internal_id]
|
||||
if is_episode:
|
||||
video_id = content_data['external_id'].split('.')[1]
|
||||
series_id = content_data['episode_metadata']['series_slug_title']
|
||||
else:
|
||||
series_id = content_data['slug_title']
|
||||
series_id = re.sub(r'-{2,}', '-', series_id)
|
||||
url = f'https://www.crunchyroll.com/{lang}{series_id}'
|
||||
if is_episode:
|
||||
url = url + f'/{display_id}-{video_id}'
|
||||
self.to_screen(f'{display_id}: Not logged in. Redirecting to non-beta site - {url}')
|
||||
return self.url_result(url, iekey, display_id)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:beta'
|
||||
@@ -789,7 +779,28 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
'duration': 366.459,
|
||||
'timestamp': 1476788400,
|
||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||
'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
|
||||
'upload_date': '20161018',
|
||||
'series': 'SHELTER',
|
||||
'series_id': 'GYGG09WWY',
|
||||
'season': 'SHELTER',
|
||||
'season_id': 'GR09MGK4R',
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Video is Premium only',
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
@@ -800,10 +811,6 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
episode_response = self._download_json(
|
||||
@@ -821,30 +828,48 @@ def _real_extract(self, url):
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||
|
||||
formats = []
|
||||
available_formats = {}
|
||||
for stream_type, streams in get_streams('streams'):
|
||||
if stream_type not in requested_formats:
|
||||
continue
|
||||
for stream in streams.values():
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
if hardsub_lang.lower() not in requested_hardsubs:
|
||||
continue
|
||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
if stream_type.endswith('hls'):
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
||||
|
||||
if '' in available_formats and 'all' not in requested_hardsubs:
|
||||
full_format_langs = set(requested_hardsubs)
|
||||
self.to_screen(
|
||||
'To get all formats of a hardsub language, use '
|
||||
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
||||
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info',
|
||||
only_once=True)
|
||||
else:
|
||||
full_format_langs = set(map(str.lower, available_formats))
|
||||
|
||||
formats = []
|
||||
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if stream_type.endswith('hls'):
|
||||
if hardsub_lang.lower() in full_format_langs:
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream['url'], display_id, 'mp4', m3u8_id=format_id,
|
||||
stream_url, display_id, 'mp4', m3u8_id=format_id,
|
||||
fatal=False, note=f'Downloading {format_id} HLS manifest')
|
||||
elif stream_type.endswith('dash'):
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream['url'], display_id, mpd_id=format_id,
|
||||
fatal=False, note=f'Downloading {format_id} MPD manifest')
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
else:
|
||||
adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
|
||||
elif stream_type.endswith('dash'):
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream_url, display_id, mpd_id=format_id,
|
||||
fatal=False, note=f'Downloading {format_id} MPD manifest')
|
||||
else:
|
||||
self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
|
||||
continue
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -897,10 +922,6 @@ class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
series_response = self._download_json(
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,122 +1,162 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts, str_or_none
|
||||
from ..utils import int_or_none, merge_dicts, try_call, url_basename
|
||||
|
||||
|
||||
class Detik20IE(InfoExtractor):
|
||||
IE_NAME = '20.detik.com'
|
||||
_VALID_URL = r'https?://20\.detik\.com/((?!program)[\w-]+)/[\d-]+/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# detikflash
|
||||
'url': 'https://20.detik.com/detikflash/20220705-220705098/zulhas-klaim-sukses-turunkan-harga-migor-jawa-bali',
|
||||
class DetikEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
# cnn embed
|
||||
'url': 'https://www.cnnindonesia.com/embed/video/846189',
|
||||
'info_dict': {
|
||||
'id': '220705098',
|
||||
'id': '846189',
|
||||
'ext': 'mp4',
|
||||
'duration': 157,
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/bfe0384db04f4bbb9dd5efc869c5d4b1-20220705164334-0s.jpg?w=650&q=80',
|
||||
'description': 'md5:ac18dcee5b107abbec1ed46e0bf400e3',
|
||||
'title': 'Zulhas Klaim Sukses Turunkan Harga Migor Jawa-Bali',
|
||||
'tags': ['zulkifli hasan', 'menteri perdagangan', 'minyak goreng'],
|
||||
'timestamp': 1657039548,
|
||||
'upload_date': '20220705'
|
||||
'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
|
||||
'thumbnail': r're:https?://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169.jpeg',
|
||||
'title': 'Video CNN Indonesia - VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
|
||||
'age_limit': 0,
|
||||
'tags': ['raja charles', ' raja charles iii', ' ratu elizabeth', ' ratu elizabeth meninggal dunia', ' raja inggris', ' inggris'],
|
||||
'release_timestamp': 1662869995,
|
||||
'release_date': '20220911',
|
||||
'uploader': 'REUTERS'
|
||||
}
|
||||
}, {
|
||||
# e-flash
|
||||
'url': 'https://20.detik.com/e-flash/20220705-220705109/ahli-level-ppkm-jadi-payung-strategi-protokol-kesehatan',
|
||||
'info_dict': {
|
||||
'id': '220705109',
|
||||
'ext': 'mp4',
|
||||
'tags': ['ppkm jabodetabek', 'dicky budiman', 'ppkm'],
|
||||
'upload_date': '20220705',
|
||||
'duration': 110,
|
||||
'title': 'Ahli: Level PPKM Jadi Payung Strategi Protokol Kesehatan',
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/Ahli-_Level_PPKM_Jadi_Payung_Strat_jOgUMCN-20220705182313-custom.jpg?w=650&q=80',
|
||||
'description': 'md5:4eb825a9842e6bdfefd66f47b364314a',
|
||||
'timestamp': 1657045255,
|
||||
}
|
||||
}, {
|
||||
# otobuzz
|
||||
# 20.detik
|
||||
'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
|
||||
'info_dict': {
|
||||
'display_id': 'mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
|
||||
'id': '220704093',
|
||||
'ext': 'mp4',
|
||||
'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
|
||||
'timestamp': 1656951521,
|
||||
'duration': 83,
|
||||
'upload_date': '20220704',
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg?w=650&q=80',
|
||||
'description': 'md5:9b2257341b6f375cdcf90106146d5ffb',
|
||||
'thumbnail': r're:https?://cdnv\.detik\.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg',
|
||||
'title': 'Mulai Rp 10 Jutaan! Ini Skema Kredit Mitsubishi Pajero Sport',
|
||||
}
|
||||
}, {
|
||||
# sport-buzz
|
||||
'url': 'https://20.detik.com/sport-buzz/20220704-220704054/crash-crash-horor-di-paruh-pertama-motogp-2022',
|
||||
'info_dict': {
|
||||
'id': '220704054',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/6b172c6fb564411996ea145128315630-20220704090746-0s.jpg?w=650&q=80',
|
||||
'title': 'Crash-crash Horor di Paruh Pertama MotoGP 2022',
|
||||
'description': 'md5:fbcc6687572ad7d16eb521b76daa50e4',
|
||||
'timestamp': 1656925591,
|
||||
'duration': 107,
|
||||
'tags': ['marc marquez', 'fabio quartararo', 'francesco bagnaia', 'motogp crash', 'motogp 2022'],
|
||||
'timestamp': 1656951521,
|
||||
'upload_date': '20220704',
|
||||
'duration': 83.0,
|
||||
'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
|
||||
'release_timestamp': 1656926321,
|
||||
'release_date': '20220704',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Ridwan Arifin ' # TODO: strip trailling whitespace at uploader
|
||||
}
|
||||
}, {
|
||||
# adu-perspektif
|
||||
'url': 'https://20.detik.com/adu-perspektif/20220518-220518144/24-tahun-reformasi-dan-alarm-demokrasi-dari-filipina',
|
||||
# pasangmata.detik
|
||||
'url': 'https://pasangmata.detik.com/contribution/366649',
|
||||
'info_dict': {
|
||||
'id': '220518144',
|
||||
'id': '366649',
|
||||
'ext': 'mp4',
|
||||
'title': '24 Tahun Reformasi dan Alarm Demokrasi dari Filipina',
|
||||
'upload_date': '20220518',
|
||||
'timestamp': 1652913823,
|
||||
'duration': 185.0,
|
||||
'tags': ['politik', 'adu perspektif', 'indonesia', 'filipina', 'demokrasi'],
|
||||
'description': 'md5:8eaaf440b839c3d02dca8c9bbbb099a9',
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/05/18/adpers_18_mei_compressed-20220518230458-custom.jpg?w=650&q=80',
|
||||
'title': 'Saling Dorong Aparat dan Pendemo di Aksi Tolak Kenaikan BBM',
|
||||
'description': 'md5:7a6580876c8381c454679e028620bea7',
|
||||
'age_limit': 0,
|
||||
'tags': 'count:17',
|
||||
'thumbnail': 'https://akcdn.detik.net.id/community/data/media/thumbs-pasangmata/2022/09/08/366649-16626229351533009620.mp4-03.jpg',
|
||||
}
|
||||
}, {
|
||||
# sosok
|
||||
'url': 'https://20.detik.com/sosok/20220702-220703032/resa-boenard-si-princess-bantar-gebang',
|
||||
# insertlive embed
|
||||
'url': 'https://www.insertlive.com/embed/video/290482',
|
||||
'info_dict': {
|
||||
'id': '220703032',
|
||||
'id': '290482',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1656824438,
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/02/SOSOK_BGBJ-20220702191138-custom.jpg?w=650&q=80',
|
||||
'title': 'Resa Boenard Si \'Princess Bantar Gebang\'',
|
||||
'description': 'md5:84ea66306a0285330de6a13fc6218b78',
|
||||
'tags': ['sosok', 'sosok20d', 'bantar gebang', 'bgbj', 'resa boenard', 'bantar gebang bgbj', 'bgbj bantar gebang', 'sosok bantar gebang', 'sosok bgbj', 'bgbj resa boenard'],
|
||||
'upload_date': '20220703',
|
||||
'duration': 650,
|
||||
'release_timestamp': 1663063704,
|
||||
'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/13/leonardo-dicaprio_169.png?w=600&q=90',
|
||||
'age_limit': 0,
|
||||
'description': 'Aktor Leonardo DiCaprio memang baru saja putus dari kekasihnya yang bernama Camilla Morrone.',
|
||||
'release_date': '20220913',
|
||||
'title': 'Diincar Leonardo DiCaprio, Gigi Hadid Ngaku Tertarik Tapi Belum Cinta',
|
||||
'tags': ['leonardo dicaprio', ' gigi hadid', ' hollywood'],
|
||||
'uploader': '!nsertlive',
|
||||
}
|
||||
}, {
|
||||
# viral
|
||||
'url': 'https://20.detik.com/viral/20220603-220603135/merasakan-bus-imut-tanpa-pengemudi-muter-muter-di-kawasan-bsd-city',
|
||||
# beautynesia embed
|
||||
'url': 'https://www.beautynesia.id/embed/video/261636',
|
||||
'info_dict': {
|
||||
'id': '220603135',
|
||||
'id': '261636',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:4771fe101aa303edb829c59c26f9e7c6',
|
||||
'timestamp': 1654304305,
|
||||
'title': 'Merasakan Bus Imut Tanpa Pengemudi, Muter-muter di Kawasan BSD City',
|
||||
'tags': ['viral', 'autonomous vehicle', 'electric', 'shuttle bus'],
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/06/03/VIRAL_BUS_NO_SUPIR-20220604004707-custom.jpg?w=650&q=80',
|
||||
'duration': 593,
|
||||
'upload_date': '20220604',
|
||||
'age_limit': 0,
|
||||
'release_timestamp': 1662375600,
|
||||
'description': 'Menurut ramalan astrologi, tiga zodiak ini bakal hoki sepanjang September 2022.',
|
||||
'title': '3 Zodiak Paling Beruntung Selama September 2022',
|
||||
'release_date': '20220905',
|
||||
'tags': ['zodiac update', ' zodiak', ' ramalan bintang', ' zodiak beruntung 2022', ' zodiak hoki september 2022', ' zodiak beruntung september 2022'],
|
||||
'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/05/3-zodiak-paling-beruntung-selama-september-2022_169.jpeg?w=600&q=90',
|
||||
'uploader': 'amh',
|
||||
}
|
||||
}, {
|
||||
# cnbcindonesia embed
|
||||
'url': 'https://www.cnbcindonesia.com/embed/video/371839',
|
||||
'info_dict': {
|
||||
'id': '371839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puluhan Pejabat Rusia Tuntut Putin Mundur',
|
||||
'tags': ['putin'],
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://awsimages.detik.net.id/visual/2022/09/13/cnbc-indonesia-tv-3_169.png?w=600&q=80',
|
||||
'description': 'md5:8b9111e37555fcd95fe549a9b4ae6fdc',
|
||||
}
|
||||
}, {
|
||||
# detik shortlink (we can get it from https://dtk.id/?<url>)
|
||||
'url': 'https://dtk.id/NkISKr',
|
||||
'info_dict': {
|
||||
'id': '220914049',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1663114488,
|
||||
'uploader': 'Tim 20Detik',
|
||||
'title': 'Pakar Bicara soal Tim Khusus Jokowi dan Mereka yang Pro ke Bjorka',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/09/14/f15cae71d7b640c58e75b254ecbb1ce1-20220914071613-0s.jpg?w=400&q=80',
|
||||
'display_id': 'pakar-bicara-soal-tim-khusus-jokowi-dan-mereka-yang-pro-ke-bjorka',
|
||||
'upload_date': '20220914',
|
||||
'release_date': '20220914',
|
||||
'description': 'md5:5eb03225f7ee40207dd3a1e18a73f1ff',
|
||||
'timestamp': 1663139688,
|
||||
'duration': 213.0,
|
||||
'tags': ['hacker bjorka', 'bjorka', 'hacker bjorka bocorkan data rahasia presiden jokowi', 'jokowi'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
json_ld_data = self._search_json_ld(webpage, display_id)
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
display_id = url_basename(url)
|
||||
player_type, video_data = self._search_regex(
|
||||
r'<script\s*[^>]+src="https?://(aws)?cdn\.detik\.net\.id/(?P<type>flowplayer|detikVideo)[^>]+>\s*(?P<video_data>{[^}]+})',
|
||||
webpage, 'playerjs', group=('type', 'video_data'), default=(None, ''))
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'videoUrl\s*:\s*"(?P<video_url>[^"]+)', webpage, 'videoUrl')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id, ext='mp4')
|
||||
json_ld_data = self._search_json_ld(webpage, display_id, default={})
|
||||
extra_info_dict = {}
|
||||
|
||||
return merge_dicts(json_ld_data, {
|
||||
'id': self._html_search_meta('video_id', webpage),
|
||||
if not player_type:
|
||||
return
|
||||
|
||||
elif player_type == 'flowplayer':
|
||||
video_json_data = self._parse_json(video_data.replace('\'', '"'), display_id)
|
||||
video_url = video_json_data['videoUrl']
|
||||
|
||||
extra_info_dict = {
|
||||
'id': self._search_regex(r'identifier\s*:\s*\'([^\']+)', webpage, 'identifier'),
|
||||
'thumbnail': video_json_data.get('imageUrl'),
|
||||
}
|
||||
|
||||
elif player_type == 'detikVideo':
|
||||
video_url = self._search_regex(
|
||||
r'videoUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl')
|
||||
extra_info_dict = {
|
||||
'id': self._html_search_meta(['video_id', 'dtk:video_id'], webpage),
|
||||
'thumbnail': self._search_regex(r'imageUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl'),
|
||||
'duration': int_or_none(self._html_search_meta('duration', webpage, fatal=False, default=None)),
|
||||
'release_timestamp': int_or_none(self._html_search_meta('dtk:publishdateunix', webpage, fatal=False, default=None), 1000),
|
||||
'timestamp': int_or_none(self._html_search_meta('dtk:createdateunix', webpage, fatal=False, default=None), 1000),
|
||||
'uploader': self._search_regex(
|
||||
r'([^-]+)', self._html_search_meta('dtk:author', webpage, default='').strip(), 'uploader',
|
||||
default=None)
|
||||
}
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
yield merge_dicts(json_ld_data, extra_info_dict, {
|
||||
'display_id': display_id,
|
||||
'title': self._html_search_meta(['og:title', 'originalTitle'], webpage) or self._html_extract_title(webpage),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': str_or_none(self._html_search_meta(['keywords', 'keyword', 'dtk:keywords'], webpage), '').split(','),
|
||||
'tags': try_call(lambda: self._html_search_meta(
|
||||
['keywords', 'keyword', 'dtk:keywords'], webpage).split(',')),
|
||||
})
|
||||
|
||||
@@ -907,6 +907,9 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
@@ -916,6 +919,13 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
'country': 'it',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': 'realm=%s' % realm,
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)'
|
||||
|
||||
@@ -54,7 +54,7 @@ def _real_extract(self, url):
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
|
||||
contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
|
||||
contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
|
||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||
|
||||
|
||||
46
yt_dlp/extractor/epoch.py
Normal file
46
yt_dlp/extractor/epoch.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EpochIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.theepochtimes\.com/[\w-]+_(?P<id>\d+).html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/they-can-do-audio-video-physical-surveillance-on-you-24h-365d-a-year-rex-lee-on-intrusive-apps_4661688.html',
|
||||
'info_dict': {
|
||||
'id': 'a3dd732c-4750-4bc8-8156-69180668bda1',
|
||||
'ext': 'mp4',
|
||||
'title': '‘They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year’: Rex Lee on Intrusive Apps',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html',
|
||||
'info_dict': {
|
||||
'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Communist Party’s Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html',
|
||||
'info_dict': {
|
||||
'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': youmaker_video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': self._html_extract_title(webpage)
|
||||
}
|
||||
99
yt_dlp/extractor/eurosport.py
Normal file
99
yt_dlp/extractor/eurosport.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2480939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
|
||||
'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
|
||||
'duration': 195.0,
|
||||
'display_id': 'vid1694147',
|
||||
'timestamp': 1654446698,
|
||||
'upload_date': '20220605',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2481254',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
|
||||
'duration': 130.0,
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
|
||||
'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
|
||||
'display_id': 'vid1694283',
|
||||
'timestamp': 1654456090,
|
||||
'upload_date': '20220605',
|
||||
}
|
||||
}, {
|
||||
# geo-fence but can bypassed by xff
|
||||
'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2582552',
|
||||
'ext': 'mp4',
|
||||
'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
|
||||
'duration': 188.0,
|
||||
'display_id': 'vid1722221',
|
||||
'timestamp': 1658936167,
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
|
||||
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
|
||||
'upload_date': '20220727',
|
||||
}
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
EurosportIE._TOKEN = self._download_json(
|
||||
'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
|
||||
'Trying to get token')['data']['attributes']['token']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
|
||||
display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
|
||||
|
||||
json_ld_data = self._search_json_ld(webpage, display_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': json_data['id'],
|
||||
'title': json_ld_data.get('title') or self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': json_ld_data.get('thumbnails'),
|
||||
'description': (json_ld_data.get('description')
|
||||
or self._html_search_meta(['og:description', 'description'], webpage)),
|
||||
'duration': json_ld_data.get('duration'),
|
||||
'timestamp': json_ld_data.get('timestamp'),
|
||||
}
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
from ..utils import load_plugins
|
||||
|
||||
# NB: Must be before other imports so that plugins can be correctly injected
|
||||
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {})
|
||||
|
||||
_LAZY_LOADER = False
|
||||
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
with contextlib.suppress(ImportError):
|
||||
@@ -19,5 +22,5 @@
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE) # noqa: F405
|
||||
|
||||
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
|
||||
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
|
||||
globals().update(_PLUGIN_CLASSES)
|
||||
_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from . import gen_extractor_classes
|
||||
from .common import InfoExtractor # isort: split
|
||||
from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -26,6 +25,7 @@
|
||||
parse_resolution,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -873,22 +873,6 @@ class GenericIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
# Wistia embed
|
||||
{
|
||||
'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
|
||||
'info_dict': {
|
||||
'id': '6e2wtrbdaf',
|
||||
'ext': 'mov',
|
||||
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
|
||||
'description': 'a Paywall Videos video from Remilon',
|
||||
'duration': 644.072,
|
||||
'uploader': 'study.com',
|
||||
'timestamp': 1459678540,
|
||||
'upload_date': '20160403',
|
||||
'filesize': 24687186,
|
||||
},
|
||||
},
|
||||
# Wistia standard embed (async)
|
||||
{
|
||||
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
|
||||
@@ -903,7 +887,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'webpage 404 not found',
|
||||
},
|
||||
# Soundcloud embed
|
||||
{
|
||||
@@ -1086,18 +1071,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# JWPlatform iframe
|
||||
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
|
||||
'info_dict': {
|
||||
'id': 'AG26UQXM',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20160719',
|
||||
'timestamp': 468923808,
|
||||
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
|
||||
},
|
||||
'add_ie': ['JWPlatform'],
|
||||
},
|
||||
{
|
||||
# Video.js embed, multiple formats
|
||||
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||
@@ -2621,10 +2594,10 @@ def _real_extract(self, url):
|
||||
default_search += ':'
|
||||
return self.url_result(default_search + url)
|
||||
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
force_videoid = None
|
||||
is_intentional = smuggled_data and smuggled_data.get('to_generic')
|
||||
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||
is_intentional = smuggled_data.get('to_generic')
|
||||
if 'force_videoid' in smuggled_data:
|
||||
force_videoid = smuggled_data['force_videoid']
|
||||
video_id = force_videoid
|
||||
else:
|
||||
@@ -2638,7 +2611,10 @@ def _real_extract(self, url):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers={'Accept-Encoding': '*'})
|
||||
full_response = self._request_webpage(url, video_id, headers={
|
||||
'Accept-Encoding': '*',
|
||||
**smuggled_data.get('http_headers', {})
|
||||
})
|
||||
new_url = full_response.geturl()
|
||||
if url != new_url:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2657,14 +2633,15 @@ def _real_extract(self, url):
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = smuggled_data.get('http_headers', {})
|
||||
format_id = str(m.group('format_id'))
|
||||
subtitles = {}
|
||||
if format_id.endswith('mpegurl'):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||
elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id)
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||
elif format_id == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id)
|
||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
@@ -2673,8 +2650,11 @@ def _real_extract(self, url):
|
||||
}]
|
||||
info_dict['direct'] = True
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
info_dict['subtitles'] = subtitles
|
||||
info_dict.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
if not self.get_param('test', False) and not is_intentional:
|
||||
@@ -2765,7 +2745,7 @@ def _real_extract(self, url):
|
||||
'age_limit': self._rta_search(webpage),
|
||||
})
|
||||
|
||||
domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
@@ -2805,7 +2785,7 @@ def _real_extract(self, url):
|
||||
|
||||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = []
|
||||
for ie in gen_extractor_classes():
|
||||
for ie in self._downloader._ies.values():
|
||||
gen = ie.extract_from_webpage(self._downloader, url, webpage)
|
||||
current_embeds = []
|
||||
try:
|
||||
@@ -2840,8 +2820,9 @@ def _real_extract(self, url):
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
self.report_detected('JW Player data')
|
||||
return merge_dicts(info, info_dict)
|
||||
if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
|
||||
self.report_detected('JW Player data')
|
||||
return merge_dicts(info, info_dict)
|
||||
except ExtractorError:
|
||||
# See https://github.com/ytdl-org/youtube-dl/pull/16735
|
||||
pass
|
||||
@@ -2917,8 +2898,12 @@ def _real_extract(self, url):
|
||||
if json_ld.get('url') not in (url, None):
|
||||
self.report_detected('JSON LD')
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}),
|
||||
'_type': 'video' if json_ld.get('ext') else 'url_transparent',
|
||||
'url': smuggle_url(json_ld['url'], {
|
||||
'force_videoid': video_id,
|
||||
'to_generic': True,
|
||||
'http_headers': {'Referer': url},
|
||||
}),
|
||||
}, json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
|
||||
395
yt_dlp/extractor/goplay.py
Normal file
395
yt_dlp/extractor/goplay.py
Normal file
@@ -0,0 +1,395 @@
|
||||
import base64
|
||||
import binascii
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
|
||||
|
||||
_NETRC_MACHINE = 'goplay'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
|
||||
'info_dict': {
|
||||
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
|
||||
'ext': 'mp4',
|
||||
'title': 'S3 - Aflevering 2',
|
||||
'series': 'De Container Cup',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Family for the Holidays',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}]
|
||||
|
||||
_id_token = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self.report_login()
|
||||
aws = AwsIdp(ie=self, pool_id='eu-west-1_dViSsKM5Y', client_id='6s1h851s8uplco5h6mqh1jac8m')
|
||||
self._id_token, _ = aws.authenticate(username=username, password=password)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, display_id = self._match_valid_url(url).group(0, 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
|
||||
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
|
||||
|
||||
movie = video_data.get('movie')
|
||||
if movie:
|
||||
video_id = movie['videoUuid']
|
||||
info_dict = {
|
||||
'title': movie.get('title')
|
||||
}
|
||||
else:
|
||||
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
|
||||
video_id = episode['videoUuid']
|
||||
info_dict = {
|
||||
'title': episode.get('episodeTitle'),
|
||||
'series': traverse_obj(episode, ('program', 'title')),
|
||||
'season_number': episode.get('seasonNumber'),
|
||||
'episode_number': episode.get('episodeNumber'),
|
||||
}
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.viervijfzes.be/content/{video_id}',
|
||||
video_id, headers={'Authorization': self._id_token})
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
api['video']['S'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
# Taken from https://github.com/add-ons/plugin.video.viervijfzes/blob/master/resources/lib/viervijfzes/auth_awsidp.py
|
||||
# Released into Public domain by https://github.com/michaelarnauts
|
||||
|
||||
class InvalidLoginException(ExtractorError):
|
||||
""" The login credentials are invalid """
|
||||
|
||||
|
||||
class AuthenticationException(ExtractorError):
|
||||
""" Something went wrong while logging in """
|
||||
|
||||
|
||||
class AwsIdp:
|
||||
""" AWS Identity Provider """
|
||||
|
||||
def __init__(self, ie, pool_id, client_id):
|
||||
"""
|
||||
:param InfoExtrator ie: The extractor that instantiated this class.
|
||||
:param str pool_id: The AWS user pool to connect to (format: <region>_<poolid>).
|
||||
E.g.: eu-west-1_aLkOfYN3T
|
||||
:param str client_id: The client application ID (the ID of the application connecting)
|
||||
"""
|
||||
|
||||
self.ie = ie
|
||||
|
||||
self.pool_id = pool_id
|
||||
if "_" not in self.pool_id:
|
||||
raise ValueError("Invalid pool_id format. Should be <region>_<poolid>.")
|
||||
|
||||
self.client_id = client_id
|
||||
self.region = self.pool_id.split("_")[0]
|
||||
self.url = "https://cognito-idp.%s.amazonaws.com/" % (self.region,)
|
||||
|
||||
# Initialize the values
|
||||
# https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22
|
||||
self.n_hex = 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1' + \
|
||||
'29024E088A67CC74020BBEA63B139B22514A08798E3404DD' + \
|
||||
'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245' + \
|
||||
'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED' + \
|
||||
'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D' + \
|
||||
'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F' + \
|
||||
'83655D23DCA3AD961C62F356208552BB9ED529077096966D' + \
|
||||
'670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B' + \
|
||||
'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9' + \
|
||||
'DE2BCBF6955817183995497CEA956AE515D2261898FA0510' + \
|
||||
'15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64' + \
|
||||
'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7' + \
|
||||
'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B' + \
|
||||
'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C' + \
|
||||
'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31' + \
|
||||
'43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF'
|
||||
|
||||
# https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49
|
||||
self.g_hex = '2'
|
||||
self.info_bits = bytearray('Caldera Derived Key', 'utf-8')
|
||||
|
||||
self.big_n = self.__hex_to_long(self.n_hex)
|
||||
self.g = self.__hex_to_long(self.g_hex)
|
||||
self.k = self.__hex_to_long(self.__hex_hash('00' + self.n_hex + '0' + self.g_hex))
|
||||
self.small_a_value = self.__generate_random_small_a()
|
||||
self.large_a_value = self.__calculate_a()
|
||||
|
||||
def authenticate(self, username, password):
|
||||
""" Authenticate with a username and password. """
|
||||
# Step 1: First initiate an authentication request
|
||||
auth_data_dict = self.__get_authentication_request(username)
|
||||
auth_data = json.dumps(auth_data_dict).encode("utf-8")
|
||||
auth_headers = {
|
||||
"X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth",
|
||||
"Accept-Encoding": "identity",
|
||||
"Content-Type": "application/x-amz-json-1.1"
|
||||
}
|
||||
auth_response_json = self.ie._download_json(
|
||||
self.url, None, data=auth_data, headers=auth_headers,
|
||||
note='Authenticating username', errnote='Invalid username')
|
||||
challenge_parameters = auth_response_json.get("ChallengeParameters")
|
||||
|
||||
if auth_response_json.get("ChallengeName") != "PASSWORD_VERIFIER":
|
||||
raise AuthenticationException(auth_response_json["message"])
|
||||
|
||||
# Step 2: Respond to the Challenge with a valid ChallengeResponse
|
||||
challenge_request = self.__get_challenge_response_request(challenge_parameters, password)
|
||||
challenge_data = json.dumps(challenge_request).encode("utf-8")
|
||||
challenge_headers = {
|
||||
"X-Amz-Target": "AWSCognitoIdentityProviderService.RespondToAuthChallenge",
|
||||
"Content-Type": "application/x-amz-json-1.1"
|
||||
}
|
||||
auth_response_json = self.ie._download_json(
|
||||
self.url, None, data=challenge_data, headers=challenge_headers,
|
||||
note='Authenticating password', errnote='Invalid password')
|
||||
|
||||
if 'message' in auth_response_json:
|
||||
raise InvalidLoginException(auth_response_json['message'])
|
||||
return (
|
||||
auth_response_json['AuthenticationResult']['IdToken'],
|
||||
auth_response_json['AuthenticationResult']['RefreshToken']
|
||||
)
|
||||
|
||||
def __get_authentication_request(self, username):
|
||||
"""
|
||||
|
||||
:param str username: The username to use
|
||||
|
||||
:return: A full Authorization request.
|
||||
:rtype: dict
|
||||
"""
|
||||
auth_request = {
|
||||
"AuthParameters": {
|
||||
"USERNAME": username,
|
||||
"SRP_A": self.__long_to_hex(self.large_a_value)
|
||||
},
|
||||
"AuthFlow": "USER_SRP_AUTH",
|
||||
"ClientId": self.client_id
|
||||
}
|
||||
return auth_request
|
||||
|
||||
def __get_challenge_response_request(self, challenge_parameters, password):
|
||||
""" Create a Challenge Response Request object.
|
||||
|
||||
:param dict[str,str|imt] challenge_parameters: The parameters for the challenge.
|
||||
:param str password: The password.
|
||||
|
||||
:return: A valid and full request data object to use as a response for a challenge.
|
||||
:rtype: dict
|
||||
"""
|
||||
user_id = challenge_parameters["USERNAME"]
|
||||
user_id_for_srp = challenge_parameters["USER_ID_FOR_SRP"]
|
||||
srp_b = challenge_parameters["SRP_B"]
|
||||
salt = challenge_parameters["SALT"]
|
||||
secret_block = challenge_parameters["SECRET_BLOCK"]
|
||||
|
||||
timestamp = self.__get_current_timestamp()
|
||||
|
||||
# Get a HKDF key for the password, SrpB and the Salt
|
||||
hkdf = self.__get_hkdf_key_for_password(
|
||||
user_id_for_srp,
|
||||
password,
|
||||
self.__hex_to_long(srp_b),
|
||||
salt
|
||||
)
|
||||
secret_block_bytes = base64.standard_b64decode(secret_block)
|
||||
|
||||
# the message is a combo of the pool_id, provided SRP userId, the Secret and Timestamp
|
||||
msg = \
|
||||
bytearray(self.pool_id.split('_')[1], 'utf-8') + \
|
||||
bytearray(user_id_for_srp, 'utf-8') + \
|
||||
bytearray(secret_block_bytes) + \
|
||||
bytearray(timestamp, 'utf-8')
|
||||
hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256)
|
||||
signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8')
|
||||
challenge_request = {
|
||||
"ChallengeResponses": {
|
||||
"USERNAME": user_id,
|
||||
"TIMESTAMP": timestamp,
|
||||
"PASSWORD_CLAIM_SECRET_BLOCK": secret_block,
|
||||
"PASSWORD_CLAIM_SIGNATURE": signature_string
|
||||
},
|
||||
"ChallengeName": "PASSWORD_VERIFIER",
|
||||
"ClientId": self.client_id
|
||||
}
|
||||
return challenge_request
|
||||
|
||||
def __get_hkdf_key_for_password(self, username, password, server_b_value, salt):
|
||||
""" Calculates the final hkdf based on computed S value, and computed U value and the key.
|
||||
|
||||
:param str username: Username.
|
||||
:param str password: Password.
|
||||
:param int server_b_value: Server B value.
|
||||
:param int salt: Generated salt.
|
||||
|
||||
:return Computed HKDF value.
|
||||
:rtype: object
|
||||
"""
|
||||
|
||||
u_value = self.__calculate_u(self.large_a_value, server_b_value)
|
||||
if u_value == 0:
|
||||
raise ValueError('U cannot be zero.')
|
||||
username_password = '%s%s:%s' % (self.pool_id.split('_')[1], username, password)
|
||||
username_password_hash = self.__hash_sha256(username_password.encode('utf-8'))
|
||||
|
||||
x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash))
|
||||
g_mod_pow_xn = pow(self.g, x_value, self.big_n)
|
||||
int_value2 = server_b_value - self.k * g_mod_pow_xn
|
||||
s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n)
|
||||
hkdf = self.__compute_hkdf(
|
||||
bytearray.fromhex(self.__pad_hex(s_value)),
|
||||
bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value)))
|
||||
)
|
||||
return hkdf
|
||||
|
||||
def __compute_hkdf(self, ikm, salt):
|
||||
""" Standard hkdf algorithm
|
||||
|
||||
:param {Buffer} ikm Input key material.
|
||||
:param {Buffer} salt Salt value.
|
||||
:return {Buffer} Strong key material.
|
||||
"""
|
||||
|
||||
prk = hmac.new(salt, ikm, hashlib.sha256).digest()
|
||||
info_bits_update = self.info_bits + bytearray(chr(1), 'utf-8')
|
||||
hmac_hash = hmac.new(prk, info_bits_update, hashlib.sha256).digest()
|
||||
return hmac_hash[:16]
|
||||
|
||||
def __calculate_u(self, big_a, big_b):
|
||||
""" Calculate the client's value U which is the hash of A and B
|
||||
|
||||
:param int big_a: Large A value.
|
||||
:param int big_b: Server B value.
|
||||
|
||||
:return Computed U value.
|
||||
:rtype: int
|
||||
"""
|
||||
|
||||
u_hex_hash = self.__hex_hash(self.__pad_hex(big_a) + self.__pad_hex(big_b))
|
||||
return self.__hex_to_long(u_hex_hash)
|
||||
|
||||
def __generate_random_small_a(self):
|
||||
""" Helper function to generate a random big integer
|
||||
|
||||
:return a random value.
|
||||
:rtype: int
|
||||
"""
|
||||
random_long_int = self.__get_random(128)
|
||||
return random_long_int % self.big_n
|
||||
|
||||
def __calculate_a(self):
|
||||
""" Calculate the client's public value A = g^a%N with the generated random number a
|
||||
|
||||
:return Computed large A.
|
||||
:rtype: int
|
||||
"""
|
||||
|
||||
big_a = pow(self.g, self.small_a_value, self.big_n)
|
||||
# safety check
|
||||
if (big_a % self.big_n) == 0:
|
||||
raise ValueError('Safety check for A failed')
|
||||
return big_a
|
||||
|
||||
@staticmethod
|
||||
def __long_to_hex(long_num):
|
||||
return '%x' % long_num
|
||||
|
||||
@staticmethod
|
||||
def __hex_to_long(hex_string):
|
||||
return int(hex_string, 16)
|
||||
|
||||
@staticmethod
|
||||
def __hex_hash(hex_string):
|
||||
return AwsIdp.__hash_sha256(bytearray.fromhex(hex_string))
|
||||
|
||||
@staticmethod
|
||||
def __hash_sha256(buf):
|
||||
"""AuthenticationHelper.hash"""
|
||||
digest = hashlib.sha256(buf).hexdigest()
|
||||
return (64 - len(digest)) * '0' + digest
|
||||
|
||||
@staticmethod
|
||||
def __pad_hex(long_int):
|
||||
""" Converts a Long integer (or hex string) to hex format padded with zeroes for hashing
|
||||
|
||||
:param int|str long_int: Number or string to pad.
|
||||
|
||||
:return Padded hex string.
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
if not isinstance(long_int, str):
|
||||
hash_str = AwsIdp.__long_to_hex(long_int)
|
||||
else:
|
||||
hash_str = long_int
|
||||
if len(hash_str) % 2 == 1:
|
||||
hash_str = '0%s' % hash_str
|
||||
elif hash_str[0] in '89ABCDEFabcdef':
|
||||
hash_str = '00%s' % hash_str
|
||||
return hash_str
|
||||
|
||||
@staticmethod
|
||||
def __get_random(nbytes):
|
||||
random_hex = binascii.hexlify(os.urandom(nbytes))
|
||||
return AwsIdp.__hex_to_long(random_hex)
|
||||
|
||||
@staticmethod
|
||||
def __get_current_timestamp():
|
||||
""" Creates a timestamp with the correct English format.
|
||||
|
||||
:return: timestamp in format 'Sun Jan 27 19:00:04 UTC 2019'
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
# We need US only data, so we cannot just do a strftime:
|
||||
# Sun Jan 27 19:00:04 UTC 2019
|
||||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
time_now = datetime.datetime.utcnow()
|
||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||
time_string = datetime.datetime.utcnow().strftime(format_string)
|
||||
return time_string
|
||||
|
||||
def __str__(self):
|
||||
return "AWS IDP Client for:\nRegion: %s\nPoolId: %s\nAppId: %s" % (
|
||||
self.region, self.pool_id.split("_")[1], self.client_id
|
||||
)
|
||||
@@ -1,10 +1,12 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
xpath_text,
|
||||
@@ -23,6 +25,9 @@ class HeiseIE(InfoExtractor):
|
||||
'timestamp': 1512734959,
|
||||
'upload_date': '20171208',
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||
'duration': 2845,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -34,11 +39,27 @@ class HeiseIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '6kmWbXleKW4',
|
||||
'ext': 'mp4',
|
||||
'title': 'NEU IM SEPTEMBER | Netflix',
|
||||
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
|
||||
'title': 'Neu im September 2017 | Netflix',
|
||||
'description': 'md5:d6852d1f96bb80760608eed3b907437c',
|
||||
'upload_date': '20170830',
|
||||
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
|
||||
'uploader_id': 'netflixdach',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': 'count:27',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCZqgRlLcvO3Fnx_npQJygcQ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/6kmWbXleKW4/maxresdefault.webp',
|
||||
'uploader_url': 'http://www.youtube.com/user/netflixdach',
|
||||
'playable_in_embed': True,
|
||||
'live_status': 'not_live',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCZqgRlLcvO3Fnx_npQJygcQ',
|
||||
'view_count': int,
|
||||
'channel': 'Netflix Deutschland, Österreich und Schweiz',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'duration': 67,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -52,11 +73,15 @@ class HeiseIE(InfoExtractor):
|
||||
'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
|
||||
'timestamp': 1512470717,
|
||||
'upload_date': '20171205',
|
||||
'duration': 786,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# FIXME: Video m3u8 fails to download; issue with Kaltura extractor
|
||||
'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
|
||||
'info_dict': {
|
||||
'id': '1_59mk80sf',
|
||||
@@ -69,6 +94,18 @@ class HeiseIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# videout
|
||||
'url': 'https://www.heise.de/ct/artikel/c-t-uplink-3-8-Anonyme-SIM-Karten-G-Sync-Monitore-Citizenfour-2440327.html',
|
||||
'info_dict': {
|
||||
'id': '2440327',
|
||||
'ext': 'mp4',
|
||||
'title': 'c\'t uplink 3.8: Anonyme SIM-Karten, G-Sync-Monitore, Citizenfour',
|
||||
'thumbnail': 'http://www.heise.de/imagine/yxM2qmol0xV3iFB7qFb70dGvXjc/gallery/',
|
||||
'description': 'md5:fa164d8c8707dff124a9626d39205f5d',
|
||||
'timestamp': 1414825200,
|
||||
'upload_date': '20141101',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
@@ -127,20 +164,22 @@ def _make_kaltura_result(kaltura_url):
|
||||
yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
|
||||
title = extract_title()
|
||||
api_params = urllib.parse.parse_qs(
|
||||
self._search_regex(r'/videout/feed\.json\?([^\']+)', webpage, 'feed params', default=None) or '')
|
||||
if not api_params or 'container' not in api_params or 'sequenz' not in api_params:
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query={
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
api_params = {
|
||||
'container': container_id,
|
||||
'sequenz': sequenz_id,
|
||||
})
|
||||
}
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query=api_params)
|
||||
|
||||
formats = []
|
||||
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
class HolodexIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.|staging\.)?holodex\.net/(?:
|
||||
api/v2/playlist/(?P<playlist>\d+)|
|
||||
watch/(?P<id>\w+)(?:\?(?:[^#]+&)?playlist=(?P<playlist2>\d+))?
|
||||
watch/(?P<id>[\w-]{11})(?:\?(?:[^#]+&)?playlist=(?P<playlist2>\d+))?
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://holodex.net/watch/9kQ2GtvDV3s',
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..utils import int_or_none, unified_timestamp, unescapeHTML
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
@@ -21,10 +26,11 @@ class HRFernsehenIE(InfoExtractor):
|
||||
'subtitles': {'de': [{
|
||||
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
|
||||
}]},
|
||||
'timestamp': 1598470200,
|
||||
'timestamp': 1598400000,
|
||||
'upload_date': '20200826',
|
||||
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||
'title': 'hessenschau vom 26.08.2020'
|
||||
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
|
||||
'title': 'hessenschau vom 26.08.2020',
|
||||
'duration': 1654
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
|
||||
@@ -33,25 +39,18 @@ class HRFernsehenIE(InfoExtractor):
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def extract_airdate(self, loader_data):
|
||||
airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
|
||||
|
||||
if airdate_str is None:
|
||||
return None
|
||||
|
||||
return unified_timestamp(airdate_str)
|
||||
|
||||
def extract_formats(self, loader_data):
|
||||
stream_formats = []
|
||||
for stream_obj in loader_data["videoResolutionLevels"]:
|
||||
data = loader_data['mediaCollection']['streams'][0]['media']
|
||||
for inner in data[1:]:
|
||||
stream_format = {
|
||||
'format_id': str(stream_obj['verticalResolution']) + "p",
|
||||
'height': stream_obj['verticalResolution'],
|
||||
'url': stream_obj['url'],
|
||||
'format_id': try_call(lambda: f'{inner["maxHResolutionPx"]}p'),
|
||||
'height': inner.get('maxHResolutionPx'),
|
||||
'url': inner['url'],
|
||||
}
|
||||
|
||||
quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
|
||||
stream_obj['url'])
|
||||
inner['url'])
|
||||
if quality_information:
|
||||
stream_format['width'] = int_or_none(quality_information.group(1))
|
||||
stream_format['height'] = int_or_none(quality_information.group(2))
|
||||
@@ -72,22 +71,22 @@ def _real_extract(self, url):
|
||||
description = self._html_search_meta(
|
||||
['description'], webpage)
|
||||
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-(?:new-)?hr-mediaplayer-loader='([^']*)'", webpage, 'ardloader'))
|
||||
loader_data = json.loads(loader_str)
|
||||
|
||||
subtitle = traverse_obj(loader_data, ('mediaCollection', 'subTitles', 0, 'sources', 0, 'url'))
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': self.extract_formats(loader_data),
|
||||
'timestamp': self.extract_airdate(loader_data)
|
||||
'subtitles': {'de': [{'url': subtitle}]},
|
||||
'timestamp': unified_timestamp(self._search_regex(
|
||||
r'<time\sdatetime="(\d{4}\W\d{1,2}\W\d{1,2})', webpage, 'datetime', fatal=False)),
|
||||
'duration': int_or_none(traverse_obj(
|
||||
loader_data, ('playerConfig', 'pluginData', 'trackingAti@all', 'richMedia', 'duration'))),
|
||||
'thumbnail': self._search_regex(r'thumbnailUrl\W*([^"]+)', webpage, 'thumbnail', default=None),
|
||||
}
|
||||
|
||||
if "subtitle" in loader_data:
|
||||
info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
|
||||
|
||||
thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
|
||||
if len(thumbnails) > 0:
|
||||
info["thumbnails"] = [{"url": t} for t in thumbnails]
|
||||
|
||||
return info
|
||||
|
||||
@@ -20,15 +20,17 @@ class HungamaIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
|
||||
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||
'md5': '687c5f1e9f832f3b59f44ed0eb1f120a',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'id': '39349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': 'Aks',
|
||||
'release_year': 2000,
|
||||
'title': 'Krishna Chants',
|
||||
'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
|
||||
'upload_date': '20180829',
|
||||
'duration': 264,
|
||||
'timestamp': 1535500800,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||
@@ -40,12 +42,7 @@ class HungamaIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id)
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
video_json = self._download_json(
|
||||
'https://www.hungama.com/index.php', video_id,
|
||||
data=urlencode_postdata({'content_id': video_id}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
@@ -53,18 +50,25 @@ def _real_extract(self, url):
|
||||
}, query={
|
||||
'c': 'common',
|
||||
'm': 'get_video_mdn_url',
|
||||
})['stream_url']
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
json_ld = self._search_json_ld(
|
||||
self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
|
||||
|
||||
return {
|
||||
**json_ld,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
})
|
||||
return info
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'url': video_json['sub_title'],
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
} if video_json.get('sub_title') else None,
|
||||
}
|
||||
|
||||
|
||||
class HungamaSongIE(InfoExtractor):
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
@@ -55,11 +54,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id=video_id)
|
||||
json_stream = self._search_regex(r'"stream":\s+"([a-zA-Z0-9+=/]+)"', webpage, 'stream', default=None)
|
||||
if not json_stream:
|
||||
raise ExtractorError('Video is offline', expected=True)
|
||||
stream_data = self._parse_json(compat_b64decode(json_stream).decode(), video_id=video_id,
|
||||
transform_source=js_to_json)
|
||||
stream_data = self._search_json(r'stream:\s', webpage, 'stream', video_id=video_id, default=None)
|
||||
room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
|
||||
if not room_info:
|
||||
raise ExtractorError('Can not extract the room info', expected=True)
|
||||
@@ -67,6 +62,8 @@ def _real_extract(self, url):
|
||||
screen_type = room_info.get('screenType')
|
||||
live_source_type = room_info.get('liveSourceType')
|
||||
stream_info_list = stream_data['data'][0]['gameStreamInfoList']
|
||||
if not stream_info_list:
|
||||
raise ExtractorError('Video is offline', expected=True)
|
||||
formats = []
|
||||
for stream_info in stream_info_list:
|
||||
stream_url = stream_info.get('sFlvUrl')
|
||||
|
||||
51
yt_dlp/extractor/iltalehti.py
Normal file
51
yt_dlp/extractor/iltalehti.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, traverse_obj
|
||||
|
||||
|
||||
class IltalehtiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?iltalehti\.fi/[^/?#]+/a/(?P<id>[^/?#])'
|
||||
_TESTS = [
|
||||
# jwplatform embed main_media
|
||||
{
|
||||
'url': 'https://www.iltalehti.fi/ulkomaat/a/9fbd067f-94e4-46cd-8748-9d958eb4dae2',
|
||||
'md5': 'af12d42c539f1f49f0b62d231fe72dcd',
|
||||
'info_dict': {
|
||||
'id': 'gYjjaf1L',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sensuroimaton Päivärinta, jakso 227: Vieraana Suomen Venäjän ex-suurlähettiläs René Nyberg ja Kenraalimajuri evp Pekka Toveri',
|
||||
'description': '',
|
||||
'upload_date': '20220928',
|
||||
'timestamp': 1664360878,
|
||||
'duration': 2089,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
},
|
||||
# jwplatform embed body
|
||||
{
|
||||
'url': 'https://www.iltalehti.fi/politiikka/a/1ce49d85-1670-428b-8db8-d2479b9950a4',
|
||||
'md5': '9e50334b8f8330ce8828b567a82a3c65',
|
||||
'info_dict': {
|
||||
'id': '18R6zkLi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pekka Toverin arvio: Näin Nord Stream -kaasuputken räjäyttäminen on saatettu toteuttaa',
|
||||
'description': 'md5:3d1302c9e17e7ffd564143ff58f8de35',
|
||||
'upload_date': '20220929',
|
||||
'timestamp': 1664435867,
|
||||
'duration': 165.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
info = self._search_json(
|
||||
r'<script>\s*window.App\s*=', webpage, 'json', article_id,
|
||||
transform_source=js_to_json)
|
||||
props = traverse_obj(info, (
|
||||
'state', 'articles', ..., 'items', (('main_media', 'properties'), ('body', ..., 'properties'))))
|
||||
video_ids = traverse_obj(props, (lambda _, v: v['provider'] == 'jwplayer', 'id'))
|
||||
return self.playlist_from_matches(
|
||||
video_ids, article_id, ie='JWPlatform', getter=lambda id: f'jwplatform:{id}',
|
||||
title=traverse_obj(info, ('state', 'articles', ..., 'items', 'canonical_title'), get_all=False))
|
||||
@@ -173,18 +173,9 @@ def _extract_product(self, product_info):
|
||||
if isinstance(product_info, list):
|
||||
product_info = product_info[0]
|
||||
|
||||
comment_data = traverse_obj(product_info, ('edge_media_to_parent_comment', 'edges'))
|
||||
comments = [{
|
||||
'author': traverse_obj(comment_dict, ('node', 'owner', 'username')),
|
||||
'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')),
|
||||
'id': traverse_obj(comment_dict, ('node', 'id')),
|
||||
'text': traverse_obj(comment_dict, ('node', 'text')),
|
||||
'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none),
|
||||
} for comment_dict in comment_data] if comment_data else None
|
||||
|
||||
user_info = product_info.get('user') or {}
|
||||
info_dict = {
|
||||
'id': product_info.get('code') or _pk_to_id(product_info.get('pk')),
|
||||
'id': _pk_to_id(traverse_obj(product_info, 'pk', 'id', expected_type=str_or_none)[:19]),
|
||||
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
|
||||
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
|
||||
'timestamp': int_or_none(product_info.get('taken_at')),
|
||||
@@ -194,7 +185,7 @@ def _extract_product(self, product_info):
|
||||
'view_count': int_or_none(product_info.get('view_count')),
|
||||
'like_count': int_or_none(product_info.get('like_count')),
|
||||
'comment_count': int_or_none(product_info.get('comment_count')),
|
||||
'comments': comments,
|
||||
'__post_extractor': self.extract_comments(_pk_to_id(product_info.get('pk'))),
|
||||
'http_headers': {
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}
|
||||
@@ -216,6 +207,23 @@ def _extract_product(self, product_info):
|
||||
**self._extract_product_media(product_info)
|
||||
}
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
comments_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
|
||||
fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._API_HEADERS) or {}
|
||||
|
||||
comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
|
||||
for comment_dict in comment_data or []:
|
||||
yield {
|
||||
'author': traverse_obj(comment_dict, ('node', 'owner', 'username'), ('user', 'username')),
|
||||
'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id'), ('user', 'pk')),
|
||||
'author_thumbnail': traverse_obj(comment_dict, ('node', 'owner', 'profile_pic_url'), ('user', 'profile_pic_url'), expected_type=url_or_none),
|
||||
'id': traverse_obj(comment_dict, ('node', 'id'), 'pk'),
|
||||
'text': traverse_obj(comment_dict, ('node', 'text'), 'text'),
|
||||
'like_count': traverse_obj(comment_dict, ('node', 'edge_liked_by', 'count'), 'comment_like_count', expected_type=int_or_none),
|
||||
'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), 'created_at', expected_type=int_or_none),
|
||||
}
|
||||
|
||||
|
||||
class InstagramIOSIE(InfoExtractor):
|
||||
IE_DESC = 'IOS instagram:// URL'
|
||||
@@ -258,7 +266,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'duration': 8.747,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': '2815873',
|
||||
@@ -268,27 +276,34 @@ class InstagramIE(InstagramBaseIE):
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'General metadata extraction failed',
|
||||
'Main webpage is locked behind the login page',
|
||||
],
|
||||
}, {
|
||||
# missing description
|
||||
'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
|
||||
# reel
|
||||
'url': 'https://www.instagram.com/reel/Chunk8-jurw/',
|
||||
'md5': 'f6d8277f74515fa3ff9f5791426e42b1',
|
||||
'info_dict': {
|
||||
'id': 'BA-pQFBG8HZ',
|
||||
'id': 'Chunk8-jurw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by britneyspears',
|
||||
'title': 'Video by instagram',
|
||||
'description': 'md5:c9cde483606ed6f80fbe9283a6a2b290',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': '12246775',
|
||||
'uploader': 'Britney Spears',
|
||||
'channel': 'britneyspears',
|
||||
'duration': 5.016,
|
||||
'timestamp': 1661529231,
|
||||
'upload_date': '20220826',
|
||||
'uploader_id': '25025320',
|
||||
'uploader': 'Instagram',
|
||||
'channel': 'instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'General metadata extraction failed',
|
||||
'Main webpage is locked behind the login page',
|
||||
],
|
||||
}, {
|
||||
# multi video post
|
||||
'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
|
||||
@@ -297,18 +312,24 @@ class InstagramIE(InstagramBaseIE):
|
||||
'id': 'BQ0dSaohpPW',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'BQ0dTpOhuHT',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 2',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'BQ0dT7RBFeF',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 3',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
@@ -316,6 +337,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'General metadata extraction failed',
|
||||
'Main webpage is locked behind the login page',
|
||||
],
|
||||
}, {
|
||||
# IGTV
|
||||
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||
@@ -334,7 +359,11 @@ class InstagramIE(InstagramBaseIE):
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||
}
|
||||
},
|
||||
'expected_warnings': [
|
||||
'General metadata extraction failed',
|
||||
'Main webpage is locked behind the login page',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -367,6 +396,15 @@ def _real_extract(self, url):
|
||||
video_id, url = self._match_valid_url(url).group('id', 'url')
|
||||
media, webpage = {}, ''
|
||||
|
||||
if self._get_cookies(url).get('sessionid'):
|
||||
info = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||
fatal=False, errnote='Video info extraction failed',
|
||||
note='Downloading video info', headers=self._API_HEADERS), ('items', 0))
|
||||
if info:
|
||||
media.update(info)
|
||||
return self._extract_product(media)
|
||||
|
||||
api_check = self._download_json(
|
||||
f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
|
||||
video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
|
||||
@@ -374,40 +412,32 @@ def _real_extract(self, url):
|
||||
|
||||
if not csrf_token:
|
||||
self.report_warning('No csrf token set by Instagram API', video_id)
|
||||
elif api_check.get('status') != 'ok':
|
||||
self.report_warning('Instagram API is not granting access', video_id)
|
||||
else:
|
||||
if self._get_cookies(url).get('sessionid'):
|
||||
media.update(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||
fatal=False, note='Downloading video info', headers={
|
||||
**self._API_HEADERS,
|
||||
'X-CSRFToken': csrf_token.value,
|
||||
}), ('items', 0)) or {})
|
||||
if media:
|
||||
return self._extract_product(media)
|
||||
csrf_token = csrf_token.value if api_check.get('status') == 'ok' else None
|
||||
if not csrf_token:
|
||||
self.report_warning('Instagram API is not granting access', video_id)
|
||||
|
||||
variables = {
|
||||
'shortcode': video_id,
|
||||
'child_comment_count': 3,
|
||||
'fetch_comment_count': 40,
|
||||
'parent_comment_count': 24,
|
||||
'has_threaded_comments': True,
|
||||
}
|
||||
general_info = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', video_id, fatal=False,
|
||||
headers={
|
||||
**self._API_HEADERS,
|
||||
'X-CSRFToken': csrf_token.value,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
variables = {
|
||||
'shortcode': video_id,
|
||||
'child_comment_count': 3,
|
||||
'fetch_comment_count': 40,
|
||||
'parent_comment_count': 24,
|
||||
'has_threaded_comments': True,
|
||||
}
|
||||
general_info = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
|
||||
headers={
|
||||
**self._API_HEADERS,
|
||||
'X-CSRFToken': csrf_token or '',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
|
||||
if not media:
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
shared_data = self._search_json(
|
||||
@@ -418,12 +448,12 @@ def _real_extract(self, url):
|
||||
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
|
||||
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
|
||||
else:
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage')
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data:
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data and not media:
|
||||
self.raise_login_required('Requested content is not available, rate-limit reached or login required')
|
||||
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
|
||||
82
yt_dlp/extractor/islamchannel.py
Normal file
82
yt_dlp/extractor/islamchannel.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj, urljoin
|
||||
|
||||
|
||||
class IslamChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.islamchannel\.tv/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.islamchannel.tv/watch/38604310',
|
||||
'info_dict': {
|
||||
'id': '38604310',
|
||||
'title': 'Omar - Young Omar',
|
||||
'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-poster="([^"]+)"', webpage, 'data poster', fatal=False) or \
|
||||
self._html_search_meta(('og:image', 'twitter:image'), webpage)
|
||||
|
||||
headers = {
|
||||
'Token': self._search_regex(r'data-token="([^"]+)"', webpage, 'data token'),
|
||||
'Token-Expiry': self._search_regex(r'data-expiry="([^"]+)"', webpage, 'data expiry'),
|
||||
'Uvid': video_id,
|
||||
}
|
||||
show_stream = self._download_json(
|
||||
f'https://v2-streams-elb.simplestreamcdn.com/api/show/stream/{video_id}', video_id,
|
||||
query={
|
||||
'key': self._search_regex(r'data-key="([^"]+)"', webpage, 'data key'),
|
||||
'platform': 'chrome',
|
||||
}, headers=headers)
|
||||
# TODO: show_stream['stream'] and show_stream['drm'] may contain something interesting
|
||||
streams = self._download_json(
|
||||
traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
|
||||
headers=headers)
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(('og:title', 'twitter:title'), webpage),
|
||||
'description': self._html_search_meta(('og:description', 'twitter:description', 'description'), webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'thumbnails': [{
|
||||
'id': 'unscaled',
|
||||
'url': thumbnail.split('?')[0],
|
||||
'ext': 'jpg',
|
||||
'preference': 2,
|
||||
}, {
|
||||
'id': 'orig',
|
||||
'url': thumbnail,
|
||||
'ext': 'jpg',
|
||||
'preference': 1,
|
||||
}] if thumbnail else None,
|
||||
}
|
||||
|
||||
|
||||
class IslamChannelSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.islamchannel\.tv/series/(?P<id>[a-f\d-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.islamchannel.tv/series/a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
|
||||
'info_dict': {
|
||||
'id': 'a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, pl_id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.finditer(r'<a\s+href="(/watch/\d+)"[^>]+?data-video-type="show">', webpage),
|
||||
pl_id, getter=lambda x: urljoin(url, x.group(1)), ie=IslamChannelIE)
|
||||
50
yt_dlp/extractor/israelnationalnews.py
Normal file
50
yt_dlp/extractor/israelnationalnews.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, traverse_obj
|
||||
|
||||
|
||||
class IsraelNationalNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?israelnationalnews\.com/news/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.israelnationalnews.com/news/354520',
|
||||
'info_dict': {
|
||||
'id': '354520'
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'jA84wQhVvg8',
|
||||
'title': 'Even CNN Host Is Shocked by How Bad Biden\'s Approval Ratings Have Gotten | DM CLIPS | Rubin Report',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:b7325a3d00c7596337dc3ae37e32d35c',
|
||||
'channel': 'The Rubin Report',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'like_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/user/RubinReport',
|
||||
'uploader_id': 'RubinReport',
|
||||
'availability': 'public',
|
||||
'view_count': int,
|
||||
'duration': 240,
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/jA84wQhVvg8/maxresdefault.webp',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'age_limit': 0,
|
||||
'tags': 'count:29',
|
||||
'channel_id': 'UCJdKr0Bgd_5saZYqLCa9mng',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCJdKr0Bgd_5saZYqLCa9mng',
|
||||
'upload_date': '20220606',
|
||||
'uploader': 'The Rubin Report',
|
||||
}
|
||||
}]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_article_id = self._match_id(url)
|
||||
article_json = self._download_json(
|
||||
f'https://www.israelnationalnews.com/Generic/NewAPI/Item?type=0&Item={news_article_id}', news_article_id)
|
||||
|
||||
urls = traverse_obj(article_json, ('Content2', ..., 'content', ..., 'attrs', 'src'))
|
||||
if not urls:
|
||||
raise ExtractorError('This article does not have any videos', expected=True)
|
||||
|
||||
return self.playlist_from_matches(urls, news_article_id, ie='Youtube')
|
||||
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
# JWPlatform iframe
|
||||
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
|
||||
'info_dict': {
|
||||
'id': 'AG26UQXM',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20160719',
|
||||
'timestamp': 1468923808,
|
||||
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
|
||||
'description': '',
|
||||
'duration': 294.0,
|
||||
},
|
||||
}, {
|
||||
# Player url not surrounded by quotes
|
||||
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
|
||||
'info_dict': {
|
||||
'id': 'R10NQdhY',
|
||||
'title': 'Playgirl',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220624',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
|
||||
'timestamp': 1656064800,
|
||||
'description': 'BRD 1966, Will Tremper',
|
||||
'duration': 5146.0,
|
||||
},
|
||||
'params': {'allowed_extractors': ['generic', 'jwplatform']},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
|
||||
# <input value=URL> is used by hyland.com
|
||||
# if we find <iframe>, dont look for <input>
|
||||
ret = re.findall(
|
||||
r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
||||
r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
||||
webpage)
|
||||
if ret:
|
||||
return ret
|
||||
|
||||
@@ -57,7 +57,7 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_data = self._search_json(
|
||||
r'window.kmklabs.gtm\s*=\s*', webpage, 'json_data', display_id)
|
||||
r'window.kmklabs.gtm\s*=', webpage, 'json_data', display_id)
|
||||
video_id = json_data['videos']['video_1']['video_id']
|
||||
|
||||
return self.url_result(
|
||||
|
||||
@@ -14,7 +14,7 @@ class MallTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||
'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
|
||||
'info_dict': {
|
||||
'id': 't0zzt0',
|
||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
@@ -25,6 +25,11 @@ class MallTVIE(InfoExtractor):
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
|
||||
'average_rating': 9.060869565217391,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
@@ -32,6 +37,24 @@ class MallTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
|
||||
'info_dict': {
|
||||
'id': 'yx010y',
|
||||
'ext': 'mp4',
|
||||
'dislike_count': int,
|
||||
'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
|
||||
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
|
||||
'comment_count': int,
|
||||
'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
|
||||
'like_count': int,
|
||||
'duration': 752,
|
||||
'timestamp': 1646956800,
|
||||
'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
|
||||
'view_count': int,
|
||||
'upload_date': '20220311',
|
||||
'average_rating': 9.685714285714285,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -43,12 +66,12 @@ def _real_extract(self, url):
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||
webpage, 'video object'), display_id)
|
||||
video_source = video['VideoSource']
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||
r'<input\s*id\s*=\s*player-id-name\s*[^>]+value\s*=\s*(\w+)', webpage, 'video id')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||
video['VideoSource'], video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
@@ -69,7 +92,7 @@ def get_count(k):
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'id': str(video_id),
|
||||
'display_id': display_id,
|
||||
'title': video.get('Title'),
|
||||
'description': clean_html(video.get('Description')),
|
||||
|
||||
@@ -8,15 +8,33 @@
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
'title': "Mornu's clutch",
|
||||
'description': '',
|
||||
'uploader': 'Aciel',
|
||||
'timestamp': 1651628243,
|
||||
'upload_date': '20220504',
|
||||
'uploader_id': '19335460',
|
||||
'uploader_url': 'https://medal.tv/users/19335460',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
@@ -26,9 +44,15 @@ class MedalTVIE(InfoExtractor):
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
@@ -39,25 +63,42 @@ class MedalTVIE(InfoExtractor):
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 9,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
path = self._match_valid_url(url).group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'hydration data', default='{}'), video_id)
|
||||
next_data = self._search_json(
|
||||
'<script[^>]*__NEXT_DATA__[^>]*>', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = try_get(
|
||||
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||
build_id = next_data.get('buildId')
|
||||
if not build_id:
|
||||
raise ExtractorError(
|
||||
'Could not find build ID.', video_id=video_id)
|
||||
|
||||
locale = next_data.get('locale', 'en')
|
||||
|
||||
api_response = self._download_json(
|
||||
f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
|
||||
|
||||
clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
@@ -113,9 +154,8 @@ def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
|
||||
return {
|
||||
|
||||
@@ -172,31 +172,27 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
def _qs(url):
|
||||
return parse_qs(url)
|
||||
|
||||
def _program_guid(qs):
|
||||
return qs.get('programGuid', [None])[0]
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
|
||||
webpage):
|
||||
embed_url = mobj.group('url')
|
||||
embed_qs = _qs(embed_url)
|
||||
embed_qs = parse_qs(embed_url)
|
||||
program_guid = _program_guid(embed_qs)
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
yield self.url_result(embed_url)
|
||||
continue
|
||||
|
||||
video_id = embed_qs.get('id', [None])[0]
|
||||
if not video_id:
|
||||
continue
|
||||
urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect')
|
||||
embed_url = urlh.geturl()
|
||||
program_guid = _program_guid(_qs(embed_url))
|
||||
program_guid = _program_guid(parse_qs(embed_url))
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
return entries
|
||||
yield self.url_result(embed_url)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||
|
||||
105
yt_dlp/extractor/mediaworksnz.py
Normal file
105
yt_dlp/extractor/mediaworksnz.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bug_reports_message,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class MediaWorksNZVODIE(InfoExtractor):
|
||||
_VALID_URL_BASE_RE = r'https?://vodupload-api\.mediaworks\.nz/library/asset/published/'
|
||||
_VALID_URL_ID_RE = r'(?P<id>[A-Za-z0-9-]+)'
|
||||
_VALID_URL = rf'{_VALID_URL_BASE_RE}{_VALID_URL_ID_RE}'
|
||||
_TESTS = [{
|
||||
'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID00359',
|
||||
'info_dict': {
|
||||
'id': 'VID00359',
|
||||
'ext': 'mp4',
|
||||
'title': 'GRG Jacinda Ardern safe drug testing 1920x1080',
|
||||
'description': 'md5:d4d7dc366742e86d8130b257dcb520ba',
|
||||
'duration': 142.76,
|
||||
'timestamp': 1604268608,
|
||||
'upload_date': '20201101',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'channel': 'George FM'
|
||||
}
|
||||
}, {
|
||||
# has audio-only format
|
||||
'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID02627',
|
||||
'info_dict': {
|
||||
'id': 'VID02627',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tova O\'Brien meets Ukraine President Volodymyr Zelensky',
|
||||
'channel': 'Today FM',
|
||||
'description': 'Watch in full the much anticipated interview of Volodymyr Zelensky',
|
||||
'duration': 2061.16,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20220822',
|
||||
'timestamp': 1661152289,
|
||||
},
|
||||
'params': {'format': 'ba[ext=mp3]'}
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.rova.nz/home/podcasts/socrates-walks-into-a-bar/the-trolley-problem---episode-1.html',
|
||||
'info_dict': {
|
||||
'id': 'VID02494',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Trolley Problem',
|
||||
'duration': 2843.56,
|
||||
'channel': 'Other',
|
||||
'timestamp': 1658356489,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Socrates Walks Into A Bar Podcast Episode 1',
|
||||
'upload_date': '20220720',
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for mobj in re.finditer(
|
||||
rf'''(?x)<div\s+\bid=["']Player-Attributes-JWID[^>]+\b
|
||||
data-request-url=["']{cls._VALID_URL_BASE_RE}["'][^>]+\b
|
||||
data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage
|
||||
):
|
||||
yield f'https://vodupload-api.mediaworks.nz/library/asset/published/{mobj.group("id")}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
asset = self._download_json(url, video_id)['asset']
|
||||
|
||||
if asset.get('drm') not in ('NonDRM', None):
|
||||
self.report_drm(video_id)
|
||||
|
||||
content_type = asset.get('type')
|
||||
if content_type and content_type != 'video':
|
||||
self.report_warning(f'Unknown content type: {content_type}' + bug_reports_message(), video_id)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['streamingUrl'], video_id)
|
||||
|
||||
audio_streaming_url = traverse_obj(
|
||||
asset, 'palyoutPathAudio', 'playoutpathaudio', expected_type=str)
|
||||
if audio_streaming_url:
|
||||
audio_formats = self._extract_m3u8_formats(audio_streaming_url, video_id, fatal=False, ext='mp3')
|
||||
for audio_format in audio_formats:
|
||||
# all the audio streams appear to be aac
|
||||
audio_format.setdefault('vcodec', 'none')
|
||||
audio_format.setdefault('acodec', 'aac')
|
||||
formats.append(audio_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': asset.get('title'),
|
||||
'description': asset.get('description'),
|
||||
'duration': float_or_none(asset.get('duration')),
|
||||
'timestamp': unified_timestamp(asset.get('dateadded')),
|
||||
'channel': asset.get('brand'),
|
||||
'thumbnails': [{'url': thumbnail_url} for thumbnail_url in asset.get('thumbnails') or []],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
66
yt_dlp/extractor/microsoftembed.py
Normal file
66
yt_dlp/extractor/microsoftembed.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class MicrosoftEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?microsoft\.com/(?:[^/]+/)?videoplayer/embed/(?P<id>[a-z0-9A-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.microsoft.com/en-us/videoplayer/embed/RWL07e',
|
||||
'md5': 'eb0ae9007f9b305f9acd0a03e74cb1a9',
|
||||
'info_dict': {
|
||||
'id': 'RWL07e',
|
||||
'title': 'Microsoft for Public Health and Social Services',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1631658316,
|
||||
'upload_date': '20210914'
|
||||
}
|
||||
}]
|
||||
_API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(self._API_URL + video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for source_type, source in metadata['streams'].items():
|
||||
if source_type == 'smooth_Streaming':
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
|
||||
elif source_type == 'apple_HTTP_Live_Streaming':
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
|
||||
elif source_type == 'mPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
'url': source['url'],
|
||||
'height': source.get('heightPixels'),
|
||||
'width': source.get('widthPixels'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {
|
||||
lang: [{
|
||||
'url': data.get('url'),
|
||||
'ext': 'vtt',
|
||||
}] for lang, data in traverse_obj(metadata, 'captions', default={}).items()
|
||||
}
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumb.get('url'),
|
||||
'width': thumb.get('width') or None,
|
||||
'height': thumb.get('height') or None,
|
||||
} for thumb in traverse_obj(metadata, ('snippet', 'thumbnails', ...))]
|
||||
self._remove_duplicate_formats(thumbnails)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(metadata, ('snippet', 'title')),
|
||||
'timestamp': unified_timestamp(traverse_obj(metadata, ('snippet', 'activeStartDate'))),
|
||||
'age_limit': int_or_none(traverse_obj(metadata, ('snippet', 'minimumAge'))) or 0,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -159,6 +159,7 @@ def _real_extract(self, url):
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': decrypted,
|
||||
'vcodec': 'none',
|
||||
'downloader_options': {
|
||||
# Mixcloud starts throttling at >~5M
|
||||
'http_chunk_size': 5242880,
|
||||
|
||||
@@ -343,6 +343,7 @@ def _real_extract(self, url):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
||||
|
||||
@@ -31,8 +31,13 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
iframe_path = self._html_search_regex(
|
||||
r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
|
||||
'iframe path')
|
||||
r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, 'iframe path', default=None)
|
||||
|
||||
if iframe_path is None:
|
||||
iframe_path = self._html_search_regex(
|
||||
r'<iframe [^>]*\bsrc="(https://motorsport\.tv/embed/[^"]+)', webpage, 'embed iframe path')
|
||||
return self.url_result(iframe_path)
|
||||
|
||||
iframe = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, iframe_path), display_id,
|
||||
'Downloading iframe')
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import try_get
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MxplayerIE(InfoExtractor):
|
||||
@@ -9,6 +13,7 @@ class MxplayerIE(InfoExtractor):
|
||||
'url': 'https://www.mxplayer.in/show/watch-my-girlfriend-is-an-alien-hindi-dubbed/season-1/episode-1-online-9d2013d31d5835bb8400e3b3c5e7bb72',
|
||||
'info_dict': {
|
||||
'id': '9d2013d31d5835bb8400e3b3c5e7bb72',
|
||||
'display_id': 'episode-1-online',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1',
|
||||
'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670',
|
||||
@@ -17,7 +22,6 @@ class MxplayerIE(InfoExtractor):
|
||||
'duration': 2451,
|
||||
'season': 'Season 1',
|
||||
'series': 'My Girlfriend Is An Alien (Hindi Dubbed)',
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/9d2013d31d5835bb8400e3b3c5e7bb72/en/16x9/320x180/9562f5f8df42cad09c9a9c4e69eb1567_1920x1080.webp',
|
||||
'episode': 'Episode 1'
|
||||
},
|
||||
'params': {
|
||||
@@ -28,21 +32,17 @@ class MxplayerIE(InfoExtractor):
|
||||
'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
|
||||
'info_dict': {
|
||||
'id': 'b9fa28df3bfb8758874735bbd7d2655a',
|
||||
'display_id': 'episode-1-online',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knock Knock (Hindi Dubbed)',
|
||||
'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b',
|
||||
'season_number': 0,
|
||||
'episode_number': 0,
|
||||
'description': 'md5:4160f2dfc3b87c524261366f6b736329',
|
||||
'duration': 5970,
|
||||
'season': 'Season 0',
|
||||
'series': None,
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/b9fa28df3bfb8758874735bbd7d2655a/en/16x9/320x180/test_pic1588676032011.webp',
|
||||
'episode': 'Episode 0'
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
|
||||
'info_dict': {
|
||||
@@ -55,26 +55,26 @@ class MxplayerIE(InfoExtractor):
|
||||
'duration': 2332,
|
||||
'season': 'Season 1',
|
||||
'series': 'Shaitaan',
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/45055d5bcff169ad48f2ad7552a83d6c/en/16x9/320x180/voot_8e7d5f8d8183340869279c732c1e3a43.webp',
|
||||
'episode': 'Episode 1'
|
||||
},
|
||||
'params': {
|
||||
'format': 'best',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available.'
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
|
||||
'info_dict': {
|
||||
'id': 'd445579792b0135598ba1bc9088a84cb',
|
||||
'display_id': 'duh-swapna-online',
|
||||
'ext': 'mp4',
|
||||
'title': 'Duh Swapna',
|
||||
'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
|
||||
'season_number': 1,
|
||||
'episode_number': 3,
|
||||
'duration': 2568,
|
||||
'season': 'Chapter 1',
|
||||
'season': 'Season 1',
|
||||
'series': 'Aashram',
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/d445579792b0135598ba1bc9088a84cb/en/4x3/1600x1200/test_pic1624819307993.webp',
|
||||
'episode': 'Episode 3'
|
||||
},
|
||||
'params': {
|
||||
@@ -85,6 +85,7 @@ class MxplayerIE(InfoExtractor):
|
||||
'url': 'https://www.mxplayer.in/show/watch-dangerous/season-1/chapter-1-online-5a351b4f9fb69436f6bd6ae3a1a75292',
|
||||
'info_dict': {
|
||||
'id': '5a351b4f9fb69436f6bd6ae3a1a75292',
|
||||
'display_id': 'chapter-1-online',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 1',
|
||||
'description': 'md5:233886b8598bc91648ac098abe1d288f',
|
||||
@@ -93,7 +94,6 @@ class MxplayerIE(InfoExtractor):
|
||||
'duration': 1305,
|
||||
'season': 'Season 1',
|
||||
'series': 'Dangerous',
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/5a351b4f9fb69436f6bd6ae3a1a75292/en/4x3/1600x1200/test_pic1624706302350.webp',
|
||||
'episode': 'Episode 1'
|
||||
},
|
||||
'params': {
|
||||
@@ -107,72 +107,79 @@ class MxplayerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Attacks of 26/11',
|
||||
'description': 'md5:689bacd29e97b3f31eaf519eb14127e5',
|
||||
'season_number': 0,
|
||||
'episode_number': 0,
|
||||
'duration': 6085,
|
||||
'season': 'Season 0',
|
||||
'series': None,
|
||||
'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/0452f0d80226c398d63ce7e3ea40fa2d/en/16x9/320x180/00c8955dab5e5d340dbde643f9b1f6fd_1920x1080.webp',
|
||||
'episode': 'Episode 0'
|
||||
},
|
||||
'params': {
|
||||
'format': 'best',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available. Cannot be played on browser'
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/movie/watch-kitne-door-kitne-paas-movie-online-a9e9c76c566205955f70d8b2cb88a6a2',
|
||||
'info_dict': {
|
||||
'id': 'a9e9c76c566205955f70d8b2cb88a6a2',
|
||||
'display_id': 'watch-kitne-door-kitne-paas-movie-online',
|
||||
'title': 'Kitne Door Kitne Paas',
|
||||
'duration': 8458,
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fb825f3c542513088024dcafef0921b4',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/show/watch-ek-thi-begum-hindi/season-2/game-of-power-online-5e5305c28f1409847cdc4520b6ad77cf',
|
||||
'info_dict': {
|
||||
'id': '5e5305c28f1409847cdc4520b6ad77cf',
|
||||
'display_id': 'game-of-power-online',
|
||||
'title': 'Game Of Power',
|
||||
'duration': 1845,
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:1d0948d2a5312d7013792d53542407f9',
|
||||
'series': 'Ek Thi Begum (Hindi)',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
type, display_id, video_id = self._match_valid_url(url).groups()
|
||||
type = 'movie_film' if type == 'movie' else 'tvshow_episode'
|
||||
API_URL = 'https://androidapi.mxplay.com/v1/detail/'
|
||||
headers = {
|
||||
'X-Av-Code': '23',
|
||||
'X-Country': 'IN',
|
||||
'X-Platform': 'android',
|
||||
'X-App-Version': '1370001318',
|
||||
'X-Resolution': '3840x2160',
|
||||
}
|
||||
data_json = self._download_json(f'{API_URL}{type}/{video_id}', display_id, headers=headers)['profile']
|
||||
video_type, display_id, video_id = self._match_valid_url(url).group('type', 'display_id', 'id')
|
||||
if 'show' in video_type:
|
||||
video_type = 'episode'
|
||||
|
||||
season, series = None, None
|
||||
for dct in data_json.get('levelInfos', []):
|
||||
if dct.get('type') == 'tvshow_season':
|
||||
season = dct.get('name')
|
||||
elif dct.get('type') == 'tvshow_show':
|
||||
series = dct.get('name')
|
||||
thumbnails = []
|
||||
for thumb in data_json.get('poster', []):
|
||||
thumbnails.append({
|
||||
'url': thumb.get('url'),
|
||||
'width': thumb.get('width'),
|
||||
'height': thumb.get('height'),
|
||||
})
|
||||
data_json = self._download_json(
|
||||
f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for dct in data_json.get('playInfo', []):
|
||||
if dct.get('extension') == 'mpd':
|
||||
frmt, subs = self._extract_mpd_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
|
||||
formats.extend(frmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
elif dct.get('extension') == 'm3u8':
|
||||
frmt, subs = self._extract_m3u8_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
|
||||
formats.extend(frmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
streams = traverse_obj(data_json, ('stream', {'m3u8': ('hls', 'high'), 'mpd': ('dash', 'high')}))
|
||||
formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://llvod.mxplay.com/{streams["mpd"]}', display_id, fatal=False)
|
||||
hls_frmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://llvod.mxplay.com/{streams["m3u8"]}', display_id, fatal=False)
|
||||
|
||||
formats.extend(hls_frmts)
|
||||
self._sort_formats(formats)
|
||||
|
||||
season = traverse_obj(data_json, ('container', 'title'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': data_json.get('name') or display_id,
|
||||
'description': data_json.get('description'),
|
||||
'season_number': data_json.get('seasonNum'),
|
||||
'episode_number': data_json.get('episodeNum'),
|
||||
'duration': data_json.get('duration'),
|
||||
'season': season,
|
||||
'series': series,
|
||||
'thumbnails': thumbnails,
|
||||
'title': data_json.get('title'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'subtitles': self._merge_subtitles(dash_subs, hls_subs),
|
||||
'display_id': display_id,
|
||||
'duration': data_json.get('duration'),
|
||||
'series': traverse_obj(data_json, ('container', 'container', 'title')),
|
||||
'description': data_json.get('description'),
|
||||
'season': season,
|
||||
'season_number': int_or_none(
|
||||
self._search_regex(r'Season (\d+)', season, 'Season Number', default=None)),
|
||||
'episode_number': data_json.get('sequence') or None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -7,14 +7,20 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
RegexNotFoundError,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -584,3 +590,169 @@ def _real_extract(self, url):
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class NBCStationsIE(InfoExtractor):
|
||||
_DOMAIN_RE = '|'.join(map(re.escape, (
|
||||
'nbcbayarea', 'nbcboston', 'nbcchicago', 'nbcconnecticut', 'nbcdfw', 'nbclosangeles',
|
||||
'nbcmiami', 'nbcnewyork', 'nbcphiladelphia', 'nbcsandiego', 'nbcwashington',
|
||||
'necn', 'telemundo52', 'telemundoarizona', 'telemundochicago', 'telemundonuevainglaterra',
|
||||
)))
|
||||
_VALID_URL = rf'https?://(?:www\.)?(?P<site>{_DOMAIN_RE})\.com/(?:[^/?#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
|
||||
'md5': '462041d91bd762ef5a38b7d85d6dc18f',
|
||||
'info_dict': {
|
||||
'id': '2968618',
|
||||
'ext': 'mp4',
|
||||
'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
|
||||
'description': None,
|
||||
'timestamp': 1661135892,
|
||||
'upload_date': '20220821',
|
||||
'uploader': 'NBC 4',
|
||||
'uploader_id': 'KNBC',
|
||||
'channel': 'nbclosangeles',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
|
||||
'md5': '0917dcf7885be1023a9220630d415f67',
|
||||
'info_dict': {
|
||||
'id': '2247002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huracán complica que televidente de Tucson reciba reembolso',
|
||||
'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
|
||||
'timestamp': 1660886507,
|
||||
'upload_date': '20220819',
|
||||
'uploader': 'Telemundo Arizona',
|
||||
'uploader_id': 'KTAZ',
|
||||
'channel': 'telemundoarizona',
|
||||
},
|
||||
}]
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'1080': '1920',
|
||||
'720': '1280',
|
||||
'540': '960',
|
||||
'360': '640',
|
||||
'234': '416',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, video_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
nbc_data = self._search_json(
|
||||
r'<script>var\s*nbc\s*=', webpage, 'NBC JSON data', video_id)
|
||||
pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC'
|
||||
fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID'))
|
||||
fw_network_id = traverse_obj(nbc_data, ('video', 'fwNetworkID'), default='382114')
|
||||
|
||||
video_data = self._parse_json(self._html_search_regex(
|
||||
r'data-videos="([^"]*)"', webpage, 'video data', default='{}'), video_id)
|
||||
video_data = variadic(video_data)[0]
|
||||
video_data.update(self._parse_json(self._html_search_regex(
|
||||
r'data-meta="([^"]*)"', webpage, 'metadata', default='{}'), video_id))
|
||||
|
||||
formats = []
|
||||
|
||||
if video_data.get('mpx_is_livestream') == '1':
|
||||
live = True
|
||||
player_id = traverse_obj(
|
||||
video_data, 'mpx_m3upid', ('video', 'meta', 'mpx_m3upid'), 'mpx_pid',
|
||||
('video', 'meta', 'mpx_pid'), 'pid_streaming_web_medium')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'LegacyRelease',
|
||||
'fwsitesection': fw_ssid,
|
||||
'fwNetworkID': fw_network_id,
|
||||
'pprofile': 'ots_desktop_html',
|
||||
'sensitive': 'false',
|
||||
'w': '1920',
|
||||
'h': '1080',
|
||||
'rnd': '1660303',
|
||||
'mode': 'LIVE',
|
||||
'format': 'SMIL',
|
||||
'tracking': 'true',
|
||||
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
|
||||
'vpaid': 'script',
|
||||
'schema': '2.0',
|
||||
'SDK': 'PDK+6.1.3',
|
||||
}
|
||||
info = {
|
||||
'title': f'{channel} livestream',
|
||||
}
|
||||
|
||||
else:
|
||||
live = False
|
||||
player_id = traverse_obj(
|
||||
video_data, ('video', 'meta', 'pid_streaming_web_high'), 'pid_streaming_web_high',
|
||||
('video', 'meta', 'mpx_pid'), 'mpx_pid')
|
||||
|
||||
date_string = traverse_obj(video_data, 'date_string', 'date_gmt')
|
||||
if date_string:
|
||||
date_string = self._search_regex(
|
||||
r'datetime="([^"]+)"', date_string, 'date string', fatal=False)
|
||||
else:
|
||||
date_string = traverse_obj(
|
||||
nbc_data, ('dataLayer', 'adobe', 'prop70'), ('dataLayer', 'adobe', 'eVar70'),
|
||||
('dataLayer', 'adobe', 'eVar59'))
|
||||
|
||||
video_url = traverse_obj(video_data, ('video', 'meta', 'mp4_url'), 'mp4_url')
|
||||
if video_url:
|
||||
height = url_basename(video_url).split('-')[1].split('p')[0]
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(self._RESOLUTIONS.get(height)),
|
||||
'height': int_or_none(height),
|
||||
'format_id': f'http-{height}',
|
||||
})
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'LegacyRelease',
|
||||
'fwsitesection': fw_ssid,
|
||||
'fwNetworkID': fw_network_id,
|
||||
'format': 'redirect',
|
||||
'manifest': 'm3u',
|
||||
'Tracking': 'true',
|
||||
'Embedded': 'true',
|
||||
'formats': 'MPEG4',
|
||||
}
|
||||
info = {
|
||||
'title': video_data.get('title') or traverse_obj(
|
||||
nbc_data, ('dataLayer', 'contenttitle'), ('dataLayer', 'title'),
|
||||
('dataLayer', 'adobe', 'prop22'), ('dataLayer', 'id')),
|
||||
'description': traverse_obj(video_data, 'summary', 'excerpt', 'video_hero_text'),
|
||||
'upload_date': str_or_none(unified_strdate(date_string)),
|
||||
'timestamp': int_or_none(unified_timestamp(date_string)),
|
||||
}
|
||||
|
||||
if not player_id:
|
||||
raise ExtractorError(
|
||||
'No video player ID or livestream player ID found in webpage', expected=True)
|
||||
|
||||
headers = {'Origin': f'https://www.{channel}.com'}
|
||||
manifest, urlh = self._download_webpage_handle(
|
||||
f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
|
||||
headers=headers, query=query, note='Downloading manifest')
|
||||
if live:
|
||||
manifest_url = self._search_regex(r'<video src="([^"]*)', manifest, 'manifest URL')
|
||||
else:
|
||||
manifest_url = urlh.geturl()
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', headers=headers, m3u8_id='hls',
|
||||
fatal=live, live=live, errnote='No HLS formats found'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': str_or_none(video_id),
|
||||
'channel': channel,
|
||||
'uploader': str_or_none(nbc_data.get('on_air_name')),
|
||||
'uploader_id': str_or_none(nbc_data.get('callLetters')),
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
**info,
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user