mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-17 06:35:42 +07:00
Compare commits
79 Commits
2021.03.03
...
2021.03.24
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2f0b0c672 | ||
|
|
b704fc1a68 | ||
|
|
a3affbe6a0 | ||
|
|
1418a0437f | ||
|
|
143db31d48 | ||
|
|
3700c7ef10 | ||
|
|
498f560638 | ||
|
|
394dcd4486 | ||
|
|
83b20a970d | ||
|
|
e1feb88fdf | ||
|
|
389b9dbbcc | ||
|
|
a7f347d9c9 | ||
|
|
421a459573 | ||
|
|
c224251aad | ||
|
|
037cc66ec8 | ||
|
|
9160a0c6a2 | ||
|
|
5c5fae6d2f | ||
|
|
c1d3a4a8f0 | ||
|
|
adc74b3c6d | ||
|
|
beb4b92a66 | ||
|
|
cd9b384cc3 | ||
|
|
4d971a16b8 | ||
|
|
3561530776 | ||
|
|
4690688658 | ||
|
|
fe845284c4 | ||
|
|
2b3bf01c90 | ||
|
|
23c1a66730 | ||
|
|
dd18a58cb1 | ||
|
|
a94bfd6cfe | ||
|
|
a515a78dd3 | ||
|
|
e167860ce7 | ||
|
|
75d43ca080 | ||
|
|
5226731e2d | ||
|
|
dcf64d43e0 | ||
|
|
e3c076970e | ||
|
|
7978e172f3 | ||
|
|
605d299f83 | ||
|
|
18c1f04362 | ||
|
|
e4beae703d | ||
|
|
d034ab669c | ||
|
|
5aeefbd633 | ||
|
|
597c18665e | ||
|
|
10db0d2f57 | ||
|
|
7275535116 | ||
|
|
a1c5d2ca64 | ||
|
|
ca87974543 | ||
|
|
e92caff5d5 | ||
|
|
ea3a012d2a | ||
|
|
5b8917fb52 | ||
|
|
8eec0120a2 | ||
|
|
4cf1e5d2f9 | ||
|
|
0a473f2f0f | ||
|
|
e4edeb6226 | ||
|
|
d488e254d9 | ||
|
|
d7009caa03 | ||
|
|
54759df586 | ||
|
|
605b684c2d | ||
|
|
994443d24d | ||
|
|
c5640c4508 | ||
|
|
1f52a09e2e | ||
|
|
fc21af505c | ||
|
|
015f3b3120 | ||
|
|
5ba4a0b69c | ||
|
|
0852947fcc | ||
|
|
99594a11ce | ||
|
|
2be71994c0 | ||
|
|
26fe8ffed0 | ||
|
|
feee67ae88 | ||
|
|
1caaf92d47 | ||
|
|
d069eca7a3 | ||
|
|
f3eaa8dd1c | ||
|
|
9e631877f8 | ||
|
|
36147a63e3 | ||
|
|
57db6a87ef | ||
|
|
cd7c66cf01 | ||
|
|
2c736b4f61 | ||
|
|
c4a508ab31 | ||
|
|
7815e55572 | ||
|
|
162e6f0000 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ ## Checklist
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.01. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ ## Checklist
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.01**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -44,7 +44,7 @@ ## Verbose log
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.01
|
||||
[debug] yt-dlp version 2021.03.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ ## Checklist
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.01. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ ## Checklist
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.01**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ ## Checklist
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.01. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.01**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ ## Checklist
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.01. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ ## Checklist
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.01**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -46,7 +46,7 @@ ## Verbose log
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.01
|
||||
[debug] yt-dlp version 2021.03.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ ## Checklist
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.01. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.01**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -60,6 +60,7 @@ yt-dlp.zip
|
||||
*.mkv
|
||||
*.swf
|
||||
*.part
|
||||
*.part-*
|
||||
*.ytdl
|
||||
*.dump
|
||||
*.frag
|
||||
|
||||
11
CONTRIBUTORS
11
CONTRIBUTORS
@@ -1,5 +1,6 @@
|
||||
pukkandan (owner)
|
||||
shirt-dev (collaborator)
|
||||
colethedj (collaborator)
|
||||
h-h-h-h
|
||||
pauldubois98
|
||||
nixxo
|
||||
@@ -21,10 +22,16 @@ nao20010128nao
|
||||
kurumigi
|
||||
tsukumi
|
||||
bbepis
|
||||
animelover1984
|
||||
Pccode66
|
||||
Ashish
|
||||
Ashish0804
|
||||
RobinD42
|
||||
hseg
|
||||
colethedj
|
||||
DennyDai
|
||||
codeasashu
|
||||
teesid
|
||||
kevinoconnor7
|
||||
damianoamatruda
|
||||
2ShedsJackson
|
||||
CXwudi
|
||||
xtkoba
|
||||
|
||||
155
Changelog.md
155
Changelog.md
@@ -17,6 +17,88 @@ # Instuctions for creating release
|
||||
-->
|
||||
|
||||
|
||||
### 2021.03.24.1
|
||||
* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
|
||||
|
||||
### 2021.03.24
|
||||
* Merge youtube-dl: Upto [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
* Parse metadata from multiple fields using `--parse-metadata`
|
||||
* Ability to load playlist infojson using `--load-info-json`
|
||||
* Write current epoch to infojson when using `--no-clean-infojson`
|
||||
* [youtube_live_chat] fix bug when trying to set cookies
|
||||
* [niconico] Fix for when logged in by: @CXwudi and @xtkoba
|
||||
* [linuxacadamy] Fix login
|
||||
|
||||
|
||||
### 2021.03.21
|
||||
* Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881)
|
||||
* Option `--no-clean-infojson` to keep private keys in the infojson
|
||||
* [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* [aria2c] Better default arguments
|
||||
* [movefiles] Fix bugs and make more robust
|
||||
* [formatSort] Fix `quality` being ignored
|
||||
* [splitchapters] Fix for older ffmpeg
|
||||
* [sponskrub] Pass proxy to sponskrub
|
||||
* Make sure `post_hook` gets the final filename
|
||||
* Recursively remove any private keys from infojson
|
||||
* Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
|
||||
* Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
|
||||
* [documentation] Add deprecated options and aliases in readme
|
||||
* [documentation] Fix some minor mistakes
|
||||
|
||||
* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work)
|
||||
* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984)
|
||||
* [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984)
|
||||
* [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson)
|
||||
* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||
* [rcs] Improved extraction by [nixxo](https://github.com/nixxo)
|
||||
* [linuxacadamy] Improve regex
|
||||
* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube] bugfix for channel playlist extraction
|
||||
* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson)
|
||||
|
||||
|
||||
### 2021.03.15
|
||||
* **Split video by chapters**: using option `--split-chapters`
|
||||
* The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:`
|
||||
* Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
|
||||
* **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev)
|
||||
* Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1)
|
||||
* Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea)
|
||||
* [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
|
||||
* [rai] fix drm check [nixxo](https://github.com/nixxo)
|
||||
* [wimtv] Add extractor by [nixxo](https://github.com/nixxo)
|
||||
* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo)
|
||||
* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7)
|
||||
* [youtube] Rewrite comment extraction by [colethedj](https://github.com/colethedj)
|
||||
* [embedthumbnail] Set mtime correctly
|
||||
* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
|
||||
|
||||
|
||||
### 2021.03.07
|
||||
* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix private feeds/playlists on multi-channel accounts by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Extract alerts from continuation by [colethedj](https://github.com/colethedj)
|
||||
* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev)
|
||||
* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804)
|
||||
* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||
* [vimeo] Fix videos with password by [teesid](https://github.com/teesid)
|
||||
* [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo)
|
||||
* [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984)
|
||||
* [trovo] Pass origin header
|
||||
* [rai] Check for DRM by [nixxo](https://github.com/nixxo)
|
||||
* [downloader] Fix bug for `ffmpeg`/`httpie`
|
||||
* [update] Fix updater removing the executable bit on some UNIX distros
|
||||
* [update] Fix current build hash for UNIX
|
||||
* [documentation] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
|
||||
* Fix some videos downloading with `m3u8` extension
|
||||
* Remove "fixup is ignored" warning when fixup wasn't passed by user
|
||||
|
||||
|
||||
### 2021.03.03.2
|
||||
* [build] Fix bug
|
||||
|
||||
@@ -24,10 +106,10 @@ ### 2021.03.03.2
|
||||
### 2021.03.03
|
||||
* [youtube] Use new browse API for continuation page extraction by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
|
||||
* **Merge youtube-dl:** Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
|
||||
* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
|
||||
* [mtv] Fix extractor
|
||||
* [nick] Fix extractor by [DennyDai](https://github.com/DennyDai)
|
||||
* [mxplayer] Add new extractor by[codeasashu](https://github.com/codeasashu)
|
||||
* [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu)
|
||||
* [youtube] Throw error when `--extractor-retries` are exhausted
|
||||
* Reduce default of `--extractor-retries` to 3
|
||||
* Fix packaging bugs by [hseg](https://github.com/hseg)
|
||||
@@ -59,10 +141,10 @@ ### 2021.02.24
|
||||
* Moved project to an organization [yt-dlp](https://github.com/yt-dlp)
|
||||
* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan)
|
||||
* Also, `youtube-dlc` config files are no longer loaded
|
||||
* **Merge youtube-dl:** Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
|
||||
* Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
|
||||
* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev)
|
||||
* [youtube] Show if video was a live stream in info (`was_live`)
|
||||
* [Zee5] Add new extractor by [Ashish](https://github.com/Ashish) and [pukkandan](https://github.com/pukkandan)
|
||||
* [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
|
||||
* [jwplatform] Add support for `hyland.com`
|
||||
* [tennistv] Fix extractor
|
||||
* [hls] Support media initialization by [shirt](https://github.com/shirt-dev)
|
||||
@@ -77,7 +159,7 @@ ### 2021.02.24
|
||||
|
||||
|
||||
### 2021.02.19
|
||||
* **Merge youtube-dl:** Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
|
||||
* Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
|
||||
* [viki] Fix extractor
|
||||
* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi)
|
||||
* [youtube] Multiple page support for hashtag URLs
|
||||
@@ -102,7 +184,7 @@ ### 2021.02.19
|
||||
|
||||
|
||||
### 2021.02.15
|
||||
* **Merge youtube-dl:** Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
|
||||
* Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
|
||||
* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumi](https://github.com/tsukumi), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan)
|
||||
* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev)
|
||||
* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika)
|
||||
@@ -145,7 +227,7 @@ ### 2021.02.09
|
||||
|
||||
|
||||
### 2021.02.04
|
||||
* **Merge youtube-dl:** Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
|
||||
* Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
|
||||
* **Date/time formatting in output template:**
|
||||
* You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s`
|
||||
* **Multiple output templates:**
|
||||
@@ -199,7 +281,7 @@ ### 2021.01.29
|
||||
|
||||
|
||||
### 2021.01.24
|
||||
* **Merge youtube-dl:** Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||
* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||
* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
|
||||
* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
|
||||
* The syntax is `-P "type:path" -P "type:path"` ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=-P,%20--paths%20TYPE:PATH))
|
||||
@@ -228,7 +310,7 @@ ### 2021.01.20
|
||||
|
||||
|
||||
### 2021.01.16
|
||||
* **Merge youtube-dl:** Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||
* Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||
* **Configuration files:**
|
||||
* Portable configuration file: `./yt-dlp.conf`
|
||||
* Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details
|
||||
@@ -258,7 +340,7 @@ ### 2021.01.10
|
||||
* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr)
|
||||
* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon)
|
||||
* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot)
|
||||
* [youtube] Show if video is embeddable in info
|
||||
* [youtube] Show if video is embeddable in info (`playable_in_embed`)
|
||||
* Update version badge automatically in README
|
||||
* Enable `test_youtube_search_matching`
|
||||
* Create `to_screen` and similar functions in postprocessor/common
|
||||
@@ -274,8 +356,7 @@ ### 2021.01.09
|
||||
|
||||
|
||||
### 2021.01.08
|
||||
* **Merge youtube-dl:** Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08)
|
||||
* Extractor stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f)) have not been merged
|
||||
* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
|
||||
* Moved changelog to seperate file
|
||||
|
||||
|
||||
@@ -314,7 +395,7 @@ ### 2021.01.05
|
||||
* Changed video format sorting to show video only files and video+audio files together.
|
||||
* Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
|
||||
* Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
|
||||
* **Shortcut Options:** Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
|
||||
* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
|
||||
* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details
|
||||
* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h)
|
||||
* Added `--list-formats-as-table`, `--list-formats-old`
|
||||
@@ -324,36 +405,38 @@ ### 2021.01.05
|
||||
* Relaxed validation for format filters so that any arbitrary field can be used
|
||||
* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569))
|
||||
* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix
|
||||
* **Merge youtube-dl:** Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
|
||||
* Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
|
||||
* Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged
|
||||
* Cleaned up the fork for public use
|
||||
|
||||
|
||||
**PS**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
* Updated to youtube-dl release 2020.11.26
|
||||
* [youtube]
|
||||
* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan)
|
||||
* Youtube improvements by [pukkandan](https://github.com/pukkandan)
|
||||
* Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
|
||||
* Fix ytsearch not returning results sometimes due to promoted content
|
||||
* Temporary fix for automatic captions - disable json3
|
||||
* Fix some improper Youtube URLs
|
||||
* Redirect channel home to /video
|
||||
* Print youtube's warning message
|
||||
* Multiple pages are handled better for feeds
|
||||
* Handle Multiple pages for feeds better
|
||||
* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494)
|
||||
* Add --break-on-existing by [gergesh](https://github.com/gergesh)
|
||||
* Pre-check video IDs in the archive before downloading
|
||||
* [bitwave.tv] New extractor
|
||||
* [Gedi] Add extractor
|
||||
* [Rcs] Add new extractor
|
||||
* [skyit] Add support for multiple Sky Italia website and removed old skyitalia extractor
|
||||
* [france.tv] Fix thumbnail URL
|
||||
* [ina] support mobile links
|
||||
* [instagram] Fix extractor
|
||||
* [itv] BTCC new pages' URL update (articles instead of races)
|
||||
* [SouthparkDe] Support for English URLs
|
||||
* [spreaker] fix SpreakerShowIE test URL
|
||||
* [Vlive] Fix playlist handling when downloading a channel
|
||||
* [generic] Detect embedded bitchute videos
|
||||
* [generic] Extract embedded youtube and twitter videos
|
||||
* [ffmpeg] Ensure all streams are copied
|
||||
* Fix for os.rename error when embedding thumbnail to video in a different drive
|
||||
* make_win.bat: don't use UPX to pack vcruntime140.dll
|
||||
* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan)
|
||||
* [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus)
|
||||
* [Gedi] Add extractor by [nixxo](https://github.com/nixxo)
|
||||
* [Rcs] Add new extractor by [nixxo](https://github.com/nixxo)
|
||||
* [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo)
|
||||
* [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid)
|
||||
* [ina] support mobile links by [B0pol](https://github.com/B0pol)
|
||||
* [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn)
|
||||
* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan)
|
||||
* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk)
|
||||
* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv)
|
||||
* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
|
||||
* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv)
|
||||
* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan)
|
||||
* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan)
|
||||
* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon)
|
||||
186
README.md
186
README.md
@@ -3,7 +3,7 @@ # YT-DLP
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](https://discord.gg/S75JaBna)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
@@ -13,7 +13,7 @@ # YT-DLP
|
||||
|
||||
A command-line program to download videos from youtube.com and many other [video platforms](supportedsites.md)
|
||||
|
||||
This is a fork of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) which is inturn a fork of [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
This is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
|
||||
* [NEW FEATURES](#new-features)
|
||||
* [INSTALLATION](#installation)
|
||||
@@ -47,6 +47,7 @@ # YT-DLP
|
||||
* [Sorting Formats](#sorting-formats)
|
||||
* [Format Selection examples](#format-selection-examples)
|
||||
* [PLUGINS](#plugins)
|
||||
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||
* [MORE](#more)
|
||||
|
||||
|
||||
@@ -57,7 +58,7 @@ # NEW FEATURES
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with youtube-dl v2021.03.03**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
* **Merged with youtube-dl v2021.03.25**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--get-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, Playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
@@ -66,17 +67,19 @@ # NEW FEATURES
|
||||
* Youtube search (`ytsearch:`, `ytsearchdate:`) along with Search URLs works correctly
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multithreaded fragment downloads**: Fragment downloads can be natively multi-threaded. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use aria2c as the external downloader for DASH(mpd) and HLS(m3u8) formats. No more slow ffmpeg/native downloads
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv
|
||||
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, tiktok, akamai, ina, rumble, tennistv
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, tiktok, akamai, ina, rumble, tennistv, amcnetworks
|
||||
|
||||
* **Plugin support**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to. See [`--paths`](https://github.com/yt-dlp/yt-dlp/#:~:text=-P,%20--paths%20TYPE:PATH) for details
|
||||
|
||||
<!-- Relative link doesn't work for "#:~:text=" -->
|
||||
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
|
||||
|
||||
* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details
|
||||
|
||||
@@ -103,6 +106,23 @@ # INSTALLATION
|
||||
* Use pip+git: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release`
|
||||
* Install master branch: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp`
|
||||
|
||||
UNIX users (Linux, macOS, BSD) can also install the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) one of the following ways:
|
||||
|
||||
```
|
||||
sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
```
|
||||
sudo wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
```
|
||||
sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
### UPDATE
|
||||
Starting from version `2021.02.09`, you can use `yt-dlp -U` to update if you are using the provided release.
|
||||
If you are using `pip`, simply re-run the same command that was used to install the program.
|
||||
@@ -177,7 +197,7 @@ ## General Options:
|
||||
only list them
|
||||
--no-flat-playlist Extract the videos of a playlist
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched
|
||||
--no-mark-watched Do not mark videos watched (default)
|
||||
--no-colors Do not emit color codes in output
|
||||
|
||||
## Network Options:
|
||||
@@ -280,6 +300,8 @@ ## Video Selection:
|
||||
--no-include-ads Do not download advertisements (default)
|
||||
|
||||
## Download Options:
|
||||
-N, --concurrent-fragments N Number of fragments to download
|
||||
concurrently (default is 1)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
@@ -402,6 +424,11 @@ ## Filesystem Options:
|
||||
--write-description etc. (default)
|
||||
--no-write-playlist-metafiles Do not write playlist metadata when using
|
||||
--write-info-json, --write-description etc.
|
||||
--clean-infojson Remove some private fields such as
|
||||
filenames from the infojson. Note that it
|
||||
could still contain some personal
|
||||
information (default)
|
||||
--no-clean-infojson Write all fields to the infojson
|
||||
--get-comments Retrieve video comments to be placed in the
|
||||
.info.json file. The comments are fetched
|
||||
even without this option if the extraction
|
||||
@@ -445,7 +472,8 @@ ## Verbosity and Simulation Options:
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
--skip-download Do not download the video
|
||||
--skip-download Do not download the video but write all
|
||||
related files (Alias: --no-download)
|
||||
-g, --get-url Simulate, quiet but print URL
|
||||
-e, --get-title Simulate, quiet but print title
|
||||
--get-id Simulate, quiet but print id
|
||||
@@ -482,7 +510,7 @@ ## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
information about the video (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
@@ -496,15 +524,11 @@ ## Workarounds:
|
||||
--sleep-requests SECONDS Number of seconds to sleep between requests
|
||||
during data extraction
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download when used alone or a lower bound
|
||||
of a range for randomized sleep before each
|
||||
download (minimum possible number of
|
||||
seconds to sleep) when used along with
|
||||
--max-sleep-interval
|
||||
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
||||
before each download (maximum possible
|
||||
number of seconds to sleep). Must only be
|
||||
used along with --min-sleep-interval
|
||||
download. This is the minimum time to sleep
|
||||
when used along with --max-sleep-interval
|
||||
(Alias: --min-sleep-interval)
|
||||
--max-sleep-interval SECONDS Maximum number of seconds to sleep. Can
|
||||
only be used along with --min-sleep-interval
|
||||
--sleep-subtitles SECONDS Number of seconds to sleep before each
|
||||
subtitle download
|
||||
|
||||
@@ -554,16 +578,16 @@ ## Subtitle Options:
|
||||
--write-subs Write subtitle file
|
||||
--no-write-subs Do not write subtitle file (default)
|
||||
--write-auto-subs Write automatically generated subtitle file
|
||||
(YouTube only)
|
||||
--no-write-auto-subs Do not write automatically generated
|
||||
subtitle file (default)
|
||||
(Alias: --write-automatic-subs)
|
||||
--no-write-auto-subs Do not write auto-generated subtitles
|
||||
(default) (Alias: --no-write-automatic-subs)
|
||||
--all-subs Download all the available subtitles of the
|
||||
video
|
||||
--list-subs List all available subtitles for the video
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
--sub-lang LANGS Languages of the subtitles to download
|
||||
--sub-langs LANGS Languages of the subtitles to download
|
||||
(optional) separated by commas, use --list-
|
||||
subs for available language tags
|
||||
|
||||
@@ -617,18 +641,19 @@ ## Post-Processing Options:
|
||||
ExtractAudio, VideoRemuxer, VideoConvertor,
|
||||
EmbedSubtitle, Metadata, Merger,
|
||||
FixupStretched, FixupM4a, FixupM3u8,
|
||||
SubtitlesConvertor and EmbedThumbnail. The
|
||||
supported executables are: SponSkrub,
|
||||
FFmpeg, FFprobe, and AtomicParsley. You can
|
||||
also specify "PP+EXE:ARGS" to give the
|
||||
arguments to the specified executable only
|
||||
when being used by the specified
|
||||
postprocessor. Additionally, for
|
||||
ffmpeg/ffprobe, a number can be appended to
|
||||
the exe name seperated by "_i" to pass the
|
||||
argument before the specified input file.
|
||||
Eg: --ppa "Merger+ffmpeg_i1:-v quiet". You
|
||||
can use this option multiple times to give
|
||||
SubtitlesConvertor, EmbedThumbnail and
|
||||
SplitChapters. The supported executables
|
||||
are: SponSkrub, FFmpeg, FFprobe, and
|
||||
AtomicParsley. You can also specify
|
||||
"PP+EXE:ARGS" to give the arguments to the
|
||||
specified executable only when being used
|
||||
by the specified postprocessor.
|
||||
Additionally, for ffmpeg/ffprobe, "_i"/"_o"
|
||||
can be appended to the prefix optionally
|
||||
followed by a number to pass the argument
|
||||
before the specified input/output file. Eg:
|
||||
--ppa "Merger+ffmpeg_i1:-v quiet". You can
|
||||
use this option multiple times to give
|
||||
different arguments to different
|
||||
postprocessors. (Alias: --ppa)
|
||||
-k, --keep-video Keep the intermediate video file on disk
|
||||
@@ -645,18 +670,24 @@ ## Post-Processing Options:
|
||||
--add-metadata Write metadata to the video file
|
||||
--no-add-metadata Do not write metadata (default)
|
||||
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
||||
from other fields. Give field name to
|
||||
extract data from, and format of the field
|
||||
seperated by a ":". Either regular
|
||||
expression with named capture groups or a
|
||||
similar syntax to the output template can
|
||||
also be used. The parsed parameters replace
|
||||
any existing values and can be use in
|
||||
output template. This option can be used
|
||||
multiple times. Example: --parse-metadata
|
||||
"title:%(artist)s - %(title)s" matches a
|
||||
title like "Coldplay - Paradise". Example
|
||||
(regex): --parse-metadata
|
||||
from other fields. Give a template or field
|
||||
name to extract data from and the format to
|
||||
interpret it as, seperated by a ":". Either
|
||||
regular expression with named capture
|
||||
groups or a similar syntax to the output
|
||||
template can be used for the FORMAT.
|
||||
Similarly, the syntax for output template
|
||||
can be used for FIELD to parse the data
|
||||
from multiple fields. The parsed parameters
|
||||
replace any existing values and can be used
|
||||
in output templates. This option can be
|
||||
used multiple times. Example: --parse-
|
||||
metadata "title:%(artist)s - %(title)s"
|
||||
matches a title like "Coldplay - Paradise".
|
||||
Example: --parse-metadata "%(series)s
|
||||
%(episode_number)s:%(title)s" sets the
|
||||
title using series and episode number.
|
||||
Example (regex): --parse-metadata
|
||||
"description:Artist - (?P<artist>.+?)"
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
@@ -672,8 +703,16 @@ ## Post-Processing Options:
|
||||
downloading and post-processing, similar to
|
||||
find's -exec syntax. Example: --exec 'adb
|
||||
push {} /sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
--split-chapters Split video into multiple files based on
|
||||
internal chapters. The "chapter:" prefix
|
||||
can be used with "--paths" and "--output"
|
||||
to set the output filename for the split
|
||||
files. See "OUTPUT TEMPLATE" for details
|
||||
--no-split-chapters Do not split video based on chapters
|
||||
(default)
|
||||
|
||||
## SponSkrub (SponsorBlock) Options:
|
||||
[SponSkrub](https://github.com/yt-dlp/SponSkrub) is a utility to
|
||||
@@ -789,9 +828,9 @@ # OUTPUT TEMPLATE
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses seperated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses seperated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files seperately from the general output template by specifying the type of file followed by the template seperated by a colon ":". The different filetypes supported are `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
Additionally, you can set different output templates for the various metadata files seperately from the general output template by specifying the type of file followed by the template seperated by a colon ":". The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
@@ -800,6 +839,7 @@ # OUTPUT TEMPLATE
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
- `description` (string): The description of the video
|
||||
- `display_id` (string): An alternative identifier for the video
|
||||
- `uploader` (string): Full name of the video uploader
|
||||
- `license` (string): License name the video is licensed under
|
||||
@@ -823,6 +863,7 @@ # OUTPUT TEMPLATE
|
||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||
- `was_live` (boolean): Whether this video was originally a live stream
|
||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||
- `availability` (string): Whether the video is 'private', 'premium_only', 'subscriber_only', 'needs_auth', 'unlisted' or 'public'
|
||||
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
||||
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
||||
- `format` (string): A human-readable description of the format
|
||||
@@ -882,6 +923,13 @@ # OUTPUT TEMPLATE
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Available when using `--split-chapters` for videos with internal chapters:
|
||||
|
||||
- `section_title` (string): Title of the chapter
|
||||
- `section_number` (numeric): Number of the chapter within the file
|
||||
- `section_start` (numeric): Start time of the chapter in seconds
|
||||
- `section_end` (numeric): End time of the chapter in seconds
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKcj`, this will result in a `yt-dlp test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
@@ -1161,5 +1209,39 @@ # PLUGINS
|
||||
|
||||
**Note**: `<root-dir>` is the directory of the binary (`<root-dir>/yt-dlp`), or the root directory of the module if you are running directly from source-code (`<root dir>/yt_dlp/__main__.py`)
|
||||
|
||||
# DEPRECATED OPTIONS
|
||||
|
||||
These are all the deprecated options and the current alternative to achieve the same effect
|
||||
|
||||
--cn-verification-proxy URL --geo-verification-proxy URL
|
||||
--id -o "%(id)s.%(ext)s"
|
||||
-A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
|
||||
-t, --title -o "%(title)s-%(id)s.%(ext)s"
|
||||
-l, --literal -o accepts literal names
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--metadata-from-title FORMAT --parse-metadata "title:FORMAT"
|
||||
--prefer-avconv avconv is no longer officially supported (Alias: --no-prefer-ffmpeg)
|
||||
--prefer-ffmpeg Default (Alias: --no-prefer-avconv)
|
||||
--avconv-location avconv is no longer officially supported
|
||||
-C, --call-home Not implemented
|
||||
--no-call-home Default
|
||||
--write-srt --write-subs
|
||||
--no-write-srt --no-write-subs
|
||||
--srt-lang LANGS --sub-langs LANGS
|
||||
--prefer-unsecure --prefer-insecure
|
||||
--rate-limit RATE --limit-rate RATE
|
||||
--force-write-download-archive --force-write-archive
|
||||
--dump-intermediate-pages --dump-pages
|
||||
--dump-headers --print-traffic
|
||||
--youtube-print-sig-code No longer supported
|
||||
--trim-file-names LENGTH --trim-filenames LENGTH
|
||||
--yes-overwrites --force-overwrites
|
||||
--load-info --load-info-json
|
||||
--split-tracks --split-chapters
|
||||
--no-split-tracks --no-split-chapters
|
||||
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||
--test Only used for testing extractors
|
||||
|
||||
|
||||
# MORE
|
||||
For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq)
|
||||
|
||||
@@ -97,7 +97,8 @@ # Supported sites
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:iplayer:episodes**
|
||||
- **bbc.co.uk:iplayer:group**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**
|
||||
- **Beatport**
|
||||
@@ -347,8 +348,7 @@ # Supported sites
|
||||
- **Gaskrank**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **Gedi**
|
||||
- **GediEmbeds**
|
||||
- **GediDigital**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
@@ -544,6 +544,7 @@ # Supported sites
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **MLB**
|
||||
- **MLBVideo**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
@@ -562,6 +563,8 @@ # Supported sites
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv.it**
|
||||
- **mtv.it:programma**
|
||||
- **mtv:video**
|
||||
- **mtvjapan**
|
||||
- **mtvservices:embedded**
|
||||
@@ -633,6 +636,7 @@ # Supported sites
|
||||
- **nicknight**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NiconicoUser**
|
||||
- **Nintendo**
|
||||
- **Nitter**
|
||||
- **njoy**: N-JOY
|
||||
@@ -735,6 +739,7 @@ # Supported sites
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **PlutoTV**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PokemonWatch**
|
||||
@@ -915,6 +920,7 @@ # Supported sites
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **StitcherShow**
|
||||
- **StoryFire**
|
||||
- **StoryFireSeries**
|
||||
- **StoryFireUser**
|
||||
@@ -1172,6 +1178,7 @@ # Supported sites
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **WimTV**
|
||||
- **Wistia**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
@@ -1242,7 +1249,9 @@ # Supported sites
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **Zee5**
|
||||
- **zee5:series**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **zingmp3:album**
|
||||
- **zoom**
|
||||
- **Zype**
|
||||
|
||||
@@ -37,7 +37,6 @@ def test_youtube_playlist_matching(self):
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/AsapSCIENCE')
|
||||
assertTab('https://www.youtube.com/embedded')
|
||||
assertTab('https://www.youtube.com/feed') # Own channel's home page
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
|
||||
@@ -60,12 +60,14 @@
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
EntryNotInPlaylist,
|
||||
ExistingVideoReached,
|
||||
expand_path,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
format_field,
|
||||
FORMAT_RE,
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
@@ -216,6 +218,7 @@ class YoutubeDL(object):
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
clean_infojson: Remove private fields from the infojson
|
||||
writecomments: Extract video comments. This will not be written to disk
|
||||
unless writeinfojson is also given
|
||||
writeannotations: Write the video annotations to a .annotations.xml file
|
||||
@@ -770,95 +773,93 @@ def parse_outtmpl(self):
|
||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||
return outtmpl_dict
|
||||
|
||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
||||
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
|
||||
template_dict = dict(info_dict)
|
||||
|
||||
# duration_string
|
||||
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||
formatSeconds(info_dict['duration'], '-')
|
||||
if info_dict.get('duration', None) is not None
|
||||
else None)
|
||||
|
||||
# epoch
|
||||
template_dict['epoch'] = int(time.time())
|
||||
|
||||
# autonumber
|
||||
autonumber_size = self.params.get('autonumber_size')
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||
|
||||
# resolution if not defined
|
||||
if template_dict.get('resolution') is None:
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
elif template_dict.get('height'):
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||
|
||||
if sanitize is None:
|
||||
sanitize = lambda k, v: v
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
template_dict = collections.defaultdict(lambda: na, template_dict)
|
||||
|
||||
# For fields playlist_index and autonumber convert all occurrences
|
||||
# of %(field)s to %(field)0Nd for backward compatibility
|
||||
field_size_compat_map = {
|
||||
'playlist_index': len(str(template_dict['n_entries'])),
|
||||
'autonumber': autonumber_size,
|
||||
}
|
||||
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
||||
if mobj:
|
||||
outtmpl = re.sub(
|
||||
FIELD_SIZE_COMPAT_RE,
|
||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||
outtmpl)
|
||||
|
||||
numeric_fields = list(self._NUMERIC_FIELDS)
|
||||
|
||||
# Format date
|
||||
FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
|
||||
for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
|
||||
conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
|
||||
if key in template_dict:
|
||||
continue
|
||||
value = strftime_or_none(template_dict.get(field), frmt, na)
|
||||
if conv_type in 'crs': # string
|
||||
value = sanitize(field, value)
|
||||
else: # number
|
||||
numeric_fields.append(key)
|
||||
value = float_or_none(value, default=None)
|
||||
if value is not None:
|
||||
template_dict[key] = value
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in numeric_fields:
|
||||
if numeric_field not in template_dict:
|
||||
outtmpl = re.sub(
|
||||
FORMAT_RE.format(re.escape(numeric_field)),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
return outtmpl, template_dict
|
||||
|
||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||
try:
|
||||
template_dict = dict(info_dict)
|
||||
|
||||
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||
formatSeconds(info_dict['duration'], '-')
|
||||
if info_dict.get('duration', None) is not None
|
||||
else None)
|
||||
|
||||
template_dict['epoch'] = int(time.time())
|
||||
autonumber_size = self.params.get('autonumber_size')
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||
if template_dict.get('resolution') is None:
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
elif template_dict.get('height'):
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k == 'id' or k.endswith('_id')))
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
template_dict = collections.defaultdict(lambda: na, template_dict)
|
||||
|
||||
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
|
||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||
|
||||
# For fields playlist_index and autonumber convert all occurrences
|
||||
# of %(field)s to %(field)0Nd for backward compatibility
|
||||
field_size_compat_map = {
|
||||
'playlist_index': len(str(template_dict['n_entries'])),
|
||||
'autonumber': autonumber_size,
|
||||
}
|
||||
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
||||
if mobj:
|
||||
outtmpl = re.sub(
|
||||
FIELD_SIZE_COMPAT_RE,
|
||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||
outtmpl)
|
||||
|
||||
# As of [1] format syntax is:
|
||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
||||
FORMAT_RE = r'''(?x)
|
||||
(?<!%)
|
||||
%
|
||||
\({0}\) # mapping key
|
||||
(?:[#0\-+ ]+)? # conversion flags (optional)
|
||||
(?:\d+)? # minimum field width (optional)
|
||||
(?:\.\d+)? # precision (optional)
|
||||
[hlL]? # length modifier (optional)
|
||||
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
|
||||
'''
|
||||
|
||||
numeric_fields = list(self._NUMERIC_FIELDS)
|
||||
|
||||
# Format date
|
||||
FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
|
||||
for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
|
||||
conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
|
||||
if key in template_dict:
|
||||
continue
|
||||
value = strftime_or_none(template_dict.get(field), frmt, na)
|
||||
if conv_type in 'crs': # string
|
||||
value = sanitize(field, value)
|
||||
else: # number
|
||||
numeric_fields.append(key)
|
||||
value = float_or_none(value, default=None)
|
||||
if value is not None:
|
||||
template_dict[key] = value
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in numeric_fields:
|
||||
if numeric_field not in template_dict:
|
||||
outtmpl = re.sub(
|
||||
FORMAT_RE.format(re.escape(numeric_field)),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
|
||||
|
||||
# expand_path translates '%%' into '%' and '$$' into '$'
|
||||
# correspondingly that is not what we want since we need to keep
|
||||
@@ -873,6 +874,7 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
||||
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
||||
|
||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||
if force_ext is not None:
|
||||
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
|
||||
|
||||
@@ -1171,57 +1173,24 @@ def _fixup(r):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def _ensure_dir_exists(self, path):
|
||||
return make_dir(path, self.report_error)
|
||||
|
||||
def __process_playlist(self, ie_result, download):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
if self.params.get('allow_playlist_files', True):
|
||||
ie_copy = {
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0
|
||||
}
|
||||
ie_copy.update(dict(ie_result))
|
||||
|
||||
def ensure_dir_exists(path):
|
||||
return make_dir(path, self.report_error)
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||
if not ensure_dir_exists(encodeFilename(infofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Playlist metadata is already present')
|
||||
else:
|
||||
playlist_info = dict(ie_result)
|
||||
# playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
|
||||
del playlist_info['entries']
|
||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(playlist_info), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||
if not ensure_dir_exists(encodeFilename(descfn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Playlist description is already present')
|
||||
elif ie_result.get('description') is None:
|
||||
self.report_warning('There\'s no playlist description to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(ie_result['description'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist description file ' + descfn)
|
||||
return
|
||||
if 'entries' not in ie_result:
|
||||
raise EntryNotInPlaylist()
|
||||
incomplete_entries = bool(ie_result.get('requested_entries'))
|
||||
if incomplete_entries:
|
||||
def fill_missing_entries(entries, indexes):
|
||||
ret = [None] * max(*indexes)
|
||||
for i, entry in zip(indexes, entries):
|
||||
ret[i - 1] = entry
|
||||
return ret
|
||||
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
|
||||
|
||||
playlist_results = []
|
||||
|
||||
@@ -1248,25 +1217,20 @@ def iter_playlistitems(format):
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
for i in playlistitems:
|
||||
if -num_entries < i <= num_entries:
|
||||
yield list_ie_entries[i - 1]
|
||||
elif incomplete_entries:
|
||||
raise EntryNotInPlaylist()
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
entries = list(make_playlistitems_entries(ie_entries))
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
@@ -1278,25 +1242,73 @@ def report_download(num_entries):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
msg = 'Downloading %d videos' % n_entries
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
entries = list(make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems)))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
msg = 'Downloading %d videos' % n_entries
|
||||
|
||||
if any((entry is None for entry in entries)):
|
||||
raise EntryNotInPlaylist()
|
||||
if not playlistitems and (playliststart or playlistend):
|
||||
playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
|
||||
ie_result['entries'] = entries
|
||||
ie_result['requested_entries'] = playlistitems
|
||||
|
||||
if self.params.get('allow_playlist_files', True):
|
||||
ie_copy = {
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0
|
||||
}
|
||||
ie_copy.update(dict(ie_result))
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Playlist metadata is already present')
|
||||
else:
|
||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Playlist description is already present')
|
||||
elif ie_result.get('description') is None:
|
||||
self.report_warning('There\'s no playlist description to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(ie_result['description'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist description file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
@@ -1310,7 +1322,7 @@ def report_download(num_entries):
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
@@ -1644,7 +1656,7 @@ def _merge(formats_pair):
|
||||
new_dict.update({
|
||||
'width': the_only_video.get('width'),
|
||||
'height': the_only_video.get('height'),
|
||||
'resolution': the_only_video.get('resolution'),
|
||||
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||
'fps': the_only_video.get('fps'),
|
||||
'vcodec': the_only_video.get('vcodec'),
|
||||
'vbr': the_only_video.get('vbr'),
|
||||
@@ -1794,14 +1806,18 @@ def sanitize_numeric_fields(info):
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
for ts_key, date_key in (
|
||||
('timestamp', 'upload_date'),
|
||||
('release_timestamp', 'release_date'),
|
||||
):
|
||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
@@ -2043,7 +2059,7 @@ def print_optional(field):
|
||||
print_mandatory('format')
|
||||
if self.params.get('forcejson', False):
|
||||
self.post_extract(info_dict)
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
self.to_stdout(json.dumps(info_dict, default=repr))
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
@@ -2071,6 +2087,7 @@ def process_info(self, info_dict):
|
||||
|
||||
info_dict = self.pre_process(info_dict)
|
||||
|
||||
# info_dict['_filename'] needs to be set for backward compatibility
|
||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||
files_to_move = {}
|
||||
@@ -2089,17 +2106,14 @@ def process_info(self, info_dict):
|
||||
if full_filename is None:
|
||||
return
|
||||
|
||||
def ensure_dir_exists(path):
|
||||
return make_dir(path, self.report_error)
|
||||
|
||||
if not ensure_dir_exists(encodeFilename(full_filename)):
|
||||
if not self._ensure_dir_exists(encodeFilename(full_filename)):
|
||||
return
|
||||
if not ensure_dir_exists(encodeFilename(temp_filename)):
|
||||
if not self._ensure_dir_exists(encodeFilename(temp_filename)):
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = self.prepare_filename(info_dict, 'description')
|
||||
if not ensure_dir_exists(encodeFilename(descfn)):
|
||||
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Video description is already present')
|
||||
@@ -2116,7 +2130,7 @@ def ensure_dir_exists(path):
|
||||
|
||||
if self.params.get('writeannotations', False):
|
||||
annofn = self.prepare_filename(info_dict, 'annotation')
|
||||
if not ensure_dir_exists(encodeFilename(annofn)):
|
||||
if not self._ensure_dir_exists(encodeFilename(annofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
@@ -2139,7 +2153,10 @@ def dl(name, info, subtitle=False):
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
return fd.download(name, info, subtitle)
|
||||
new_info = dict(info)
|
||||
if new_info.get('http_headers') is None:
|
||||
new_info['http_headers'] = self._calc_headers(new_info)
|
||||
return fd.download(name, new_info, subtitle)
|
||||
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub')])
|
||||
@@ -2158,6 +2175,7 @@ def dl(name, info, subtitle=False):
|
||||
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
sub_info['filepath'] = sub_filename
|
||||
files_to_move[sub_filename] = sub_filename_final
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
@@ -2167,13 +2185,15 @@ def dl(name, info, subtitle=False):
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_info['data'])
|
||||
sub_info['filepath'] = sub_filename
|
||||
files_to_move[sub_filename] = sub_filename_final
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
dl(sub_filename, sub_info, subtitle=True)
|
||||
dl(sub_filename, sub_info.copy(), subtitle=True)
|
||||
sub_info['filepath'] = sub_filename
|
||||
files_to_move[sub_filename] = sub_filename_final
|
||||
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
@@ -2204,14 +2224,14 @@ def dl(name, info, subtitle=False):
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(info_dict, 'infojson')
|
||||
if not ensure_dir_exists(encodeFilename(infofn)):
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Video metadata is already present')
|
||||
else:
|
||||
self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(info_dict), infofn)
|
||||
write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write video metadata to JSON file ' + infofn)
|
||||
return
|
||||
@@ -2222,7 +2242,7 @@ def dl(name, info, subtitle=False):
|
||||
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
|
||||
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
|
||||
files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
|
||||
files_to_move[thumb_filename_temp] = thumb_filename
|
||||
|
||||
# Write internet shortcut files
|
||||
url_link = webloc_link = desktop_link = False
|
||||
@@ -2335,10 +2355,17 @@ def compatible_formats(formats):
|
||||
|
||||
requested_formats = info_dict['requested_formats']
|
||||
old_ext = info_dict['ext']
|
||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||
if self.params.get('merge_output_format') is None:
|
||||
if not compatible_formats(requested_formats):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||
if (info_dict['ext'] == 'webm'
|
||||
and self.params.get('writethumbnail', False)
|
||||
and info_dict.get('thumbnails')):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'webm doesn\'t support embedding a thumbnail, mkv will be used.')
|
||||
|
||||
def correct_ext(filename):
|
||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||
@@ -2360,7 +2387,7 @@ def correct_ext(filename):
|
||||
fname = prepend_extension(
|
||||
self.prepare_filename(new_info, 'temp'),
|
||||
'f%s' % f['format_id'], new_info['ext'])
|
||||
if not ensure_dir_exists(fname):
|
||||
if not self._ensure_dir_exists(fname):
|
||||
return
|
||||
downloaded.append(fname)
|
||||
partial_success, real_download = dl(fname, new_info)
|
||||
@@ -2437,9 +2464,8 @@ def correct_ext(filename):
|
||||
else:
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
if (info_dict.get('protocol') == 'm3u8_native'
|
||||
or info_dict.get('protocol') == 'm3u8'
|
||||
and self.params.get('hls_prefer_native')):
|
||||
if ('protocol' in info_dict
|
||||
and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: malformed AAC bitstream detected.' % (
|
||||
info_dict['id']))
|
||||
@@ -2455,13 +2481,13 @@ def correct_ext(filename):
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
try:
|
||||
self.post_process(dl_filename, info_dict, files_to_move)
|
||||
info_dict = self.post_process(dl_filename, info_dict, files_to_move)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Postprocessing: %s' % str(err))
|
||||
return
|
||||
try:
|
||||
for ph in self._post_hooks:
|
||||
ph(full_filename)
|
||||
ph(info_dict['filepath'])
|
||||
except Exception as err:
|
||||
self.report_error('post hooks: %s' % str(err))
|
||||
return
|
||||
@@ -2501,7 +2527,7 @@ def download(self, url_list):
|
||||
else:
|
||||
if self.params.get('dump_single_json', False):
|
||||
self.post_extract(res)
|
||||
self.to_stdout(json.dumps(res))
|
||||
self.to_stdout(json.dumps(res, default=repr))
|
||||
|
||||
return self._download_retcode
|
||||
|
||||
@@ -2510,10 +2536,10 @@ def download_with_info_file(self, info_filename):
|
||||
[info_filename], mode='r',
|
||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||
# FileInput doesn't have a read method, we can't call json.load
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except DownloadError:
|
||||
except (DownloadError, EntryNotInPlaylist):
|
||||
webpage_url = info.get('webpage_url')
|
||||
if webpage_url is not None:
|
||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||
@@ -2523,21 +2549,32 @@ def download_with_info_file(self, info_filename):
|
||||
return self._download_retcode
|
||||
|
||||
@staticmethod
|
||||
def filter_requested_info(info_dict):
|
||||
fields_to_remove = ('requested_formats', 'requested_subtitles')
|
||||
return dict(
|
||||
(k, v) for k, v in info_dict.items()
|
||||
if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
|
||||
def filter_requested_info(info_dict, actually_filter=True):
|
||||
if not actually_filter:
|
||||
info_dict['epoch'] = int(time.time())
|
||||
return info_dict
|
||||
exceptions = {
|
||||
'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
|
||||
'keep': ['_type'],
|
||||
}
|
||||
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
|
||||
filter_fn = lambda obj: (
|
||||
list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
|
||||
else obj if not isinstance(obj, dict)
|
||||
else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
|
||||
return filter_fn(info_dict)
|
||||
|
||||
def run_pp(self, pp, infodict, files_to_move={}):
|
||||
def run_pp(self, pp, infodict):
|
||||
files_to_delete = []
|
||||
if '__files_to_move' not in infodict:
|
||||
infodict['__files_to_move'] = {}
|
||||
files_to_delete, infodict = pp.run(infodict)
|
||||
if not files_to_delete:
|
||||
return files_to_move, infodict
|
||||
return infodict
|
||||
|
||||
if self.params.get('keepvideo', False):
|
||||
for f in files_to_delete:
|
||||
files_to_move.setdefault(f, '')
|
||||
infodict['__files_to_move'].setdefault(f, '')
|
||||
else:
|
||||
for old_filename in set(files_to_delete):
|
||||
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
||||
@@ -2545,9 +2582,9 @@ def run_pp(self, pp, infodict, files_to_move={}):
|
||||
os.remove(encodeFilename(old_filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning('Unable to remove downloaded original file')
|
||||
if old_filename in files_to_move:
|
||||
del files_to_move[old_filename]
|
||||
return files_to_move, infodict
|
||||
if old_filename in infodict['__files_to_move']:
|
||||
del infodict['__files_to_move'][old_filename]
|
||||
return infodict
|
||||
|
||||
@staticmethod
|
||||
def post_extract(info_dict):
|
||||
@@ -2570,20 +2607,22 @@ def actual_post_extract(info_dict):
|
||||
def pre_process(self, ie_info):
|
||||
info = dict(ie_info)
|
||||
for pp in self._pps['beforedl']:
|
||||
info = self.run_pp(pp, info)[1]
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
|
||||
def post_process(self, filename, ie_info, files_to_move={}):
|
||||
def post_process(self, filename, ie_info, files_to_move=None):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
info = dict(ie_info)
|
||||
info['filepath'] = filename
|
||||
info['__files_to_move'] = {}
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
|
||||
files_to_move, info = self.run_pp(pp, info, files_to_move)
|
||||
info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
|
||||
info = self.run_pp(pp, info)
|
||||
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
|
||||
del info['__files_to_move']
|
||||
for pp in self._pps['aftermove']:
|
||||
info = self.run_pp(pp, info, {})[1]
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
video_id = info_dict.get('id')
|
||||
@@ -2632,12 +2671,11 @@ def format_resolution(format, default='unknown'):
|
||||
return 'audio only'
|
||||
if format.get('resolution') is not None:
|
||||
return format['resolution']
|
||||
if format.get('height') is not None:
|
||||
if format.get('width') is not None:
|
||||
res = '%sx%s' % (format['width'], format['height'])
|
||||
else:
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width') is not None:
|
||||
if format.get('width') and format.get('height'):
|
||||
res = '%dx%d' % (format['width'], format['height'])
|
||||
elif format.get('height'):
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width'):
|
||||
res = '%dx?' % format['width']
|
||||
else:
|
||||
res = default
|
||||
@@ -2951,7 +2989,7 @@ def _write_thumbnails(self, info_dict, filename): # return the extensions
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '%s.' % t['id'] if multiple else ''
|
||||
thumb_display_id = '%s ' % t['id'] if multiple else ''
|
||||
t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
|
||||
t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
|
||||
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
ret.append(suffix + thumb_ext)
|
||||
|
||||
@@ -180,6 +180,8 @@ def _real_main(argv=None):
|
||||
if opts.overwrites:
|
||||
# --yes-overwrites implies --no-continue
|
||||
opts.continue_dl = False
|
||||
if opts.concurrent_fragment_downloads <= 0:
|
||||
raise ValueError('Concurrent fragments must be positive')
|
||||
|
||||
def parse_retries(retries, name=''):
|
||||
if retries in ('inf', 'infinite'):
|
||||
@@ -277,9 +279,14 @@ def parse_retries(retries, name=''):
|
||||
|
||||
def report_conflict(arg1, arg2):
|
||||
write_string('WARNING: %s is ignored since %s was given\n' % (arg2, arg1), out=sys.stderr)
|
||||
|
||||
if opts.remuxvideo and opts.recodevideo:
|
||||
report_conflict('--recode-video', '--remux-video')
|
||||
opts.remuxvideo = False
|
||||
if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False:
|
||||
report_conflict('--split-chapter', '--sponskrub-cut')
|
||||
opts.sponskrub_cut = False
|
||||
|
||||
if opts.allow_unplayable_formats:
|
||||
if opts.extractaudio:
|
||||
report_conflict('--allow-unplayable-formats', '--extract-audio')
|
||||
@@ -369,11 +376,7 @@ def report_conflict(arg1, arg2):
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# XAttrMetadataPP should be run after post-processors that may change file
|
||||
# contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# This should be below all ffmpeg PP because it may cut parts out from the video
|
||||
# This should be below most ffmpeg PP because it may cut parts out from the video
|
||||
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
|
||||
if opts.sponskrub is not False:
|
||||
postprocessors.append({
|
||||
@@ -384,6 +387,11 @@ def report_conflict(arg1, arg2):
|
||||
'force': opts.sponskrub_force,
|
||||
'ignoreerror': opts.sponskrub is None,
|
||||
})
|
||||
if opts.split_chapters:
|
||||
postprocessors.append({'key': 'FFmpegSplitChapters'})
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# ExecAfterDownload must be the last PP
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
@@ -463,6 +471,7 @@ def report_args_compat(arg, name):
|
||||
'extractor_retries': opts.extractor_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'http_chunk_size': opts.http_chunk_size,
|
||||
@@ -482,6 +491,7 @@ def report_args_compat(arg, name):
|
||||
'writeannotations': opts.writeannotations,
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'allow_playlist_files': opts.allow_playlist_files,
|
||||
'clean_infojson': opts.clean_infojson,
|
||||
'getcomments': opts.getcomments,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'write_all_thumbnails': opts.write_all_thumbnails,
|
||||
|
||||
@@ -326,6 +326,12 @@ def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen('[download] Unable to resume')
|
||||
|
||||
@staticmethod
|
||||
def supports_manifest(manifest):
|
||||
""" Whether the downloader can download the fragments from the manifest.
|
||||
Redefine in subclasses if needed. """
|
||||
pass
|
||||
|
||||
def download(self, filename, info_dict, subtitle=False):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
|
||||
@@ -1,18 +1,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
import concurrent.futures
|
||||
can_threaded_download = True
|
||||
except ImportError:
|
||||
can_threaded_download = False
|
||||
|
||||
from ..downloader import _get_real_downloader
|
||||
from .fragment import FragmentFD
|
||||
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
sanitize_open,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a DASH manifest
|
||||
Download segments in a DASH manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol
|
||||
"""
|
||||
|
||||
FD_NAME = 'dashsegments'
|
||||
@@ -37,7 +45,7 @@ def real_download(self, filename, info_dict):
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
fragment_urls = []
|
||||
fragments_to_download = []
|
||||
frag_index = 0
|
||||
for i, fragment in enumerate(fragments):
|
||||
frag_index += 1
|
||||
@@ -48,49 +56,17 @@ def real_download(self, filename, info_dict):
|
||||
assert fragment_base_url
|
||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||
|
||||
if real_downloader:
|
||||
fragment_urls.append(fragment_url)
|
||||
continue
|
||||
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
return False
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
fragments_to_download.append({
|
||||
'frag_index': frag_index,
|
||||
'index': i,
|
||||
'url': fragment_url,
|
||||
})
|
||||
|
||||
if real_downloader:
|
||||
self.to_screen(
|
||||
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['url_list'] = fragment_urls
|
||||
info_copy['fragments'] = fragments_to_download
|
||||
fd = real_downloader(self.ydl, self.params)
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
@@ -99,5 +75,104 @@ def real_download(self, filename, info_dict):
|
||||
if not success:
|
||||
return False
|
||||
else:
|
||||
def download_fragment(fragment):
|
||||
i = fragment['index']
|
||||
frag_index = fragment['frag_index']
|
||||
fragment_url = fragment['url']
|
||||
|
||||
ctx['fragment_index'] = frag_index
|
||||
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
return False, frag_index
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
|
||||
return frag_content, frag_index
|
||||
|
||||
def append_fragment(frag_content, frag_index):
|
||||
if frag_content:
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
try:
|
||||
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
file.close()
|
||||
self._append_fragment(ctx, frag_content)
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
else:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if can_threaded_download and max_workers > 1:
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
|
||||
# timeout must be 0 to return instantly
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||
try:
|
||||
while not_done:
|
||||
# Check every 1 second for KeyboardInterrupt
|
||||
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||
done |= freshly_done
|
||||
except KeyboardInterrupt:
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# timeout must be none to cancel
|
||||
concurrent.futures.wait(not_done, timeout=None)
|
||||
raise KeyboardInterrupt
|
||||
results = [future.result() for future in futures]
|
||||
|
||||
for frag_content, frag_index in results:
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments_to_download:
|
||||
frag_content, frag_index = download_fragment(fragment)
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
return True
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
cli_bool_option,
|
||||
cli_configuration_args,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
@@ -108,7 +107,8 @@ def _valueless_option(self, command_option, param, expected_value=True):
|
||||
def _configuration_args(self, *args, **kwargs):
|
||||
return cli_configuration_args(
|
||||
self.params.get('external_downloader_args'),
|
||||
self.get_basename(), *args, **kwargs)
|
||||
[self.get_basename(), 'default'],
|
||||
*args, **kwargs)
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
@@ -116,24 +116,43 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
if 'fragments' in info_dict:
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode == 0:
|
||||
break
|
||||
# TODO: Decide whether to retry based on error code
|
||||
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.to_screen(
|
||||
'[%s] Got error. Retrying fragments (attempt %d of %s)...'
|
||||
% (self.get_basename(), count, self.format_retries(fragment_retries)))
|
||||
if count > fragment_retries:
|
||||
if not skip_unavailable_fragments:
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return -1
|
||||
|
||||
if 'url_list' in info_dict:
|
||||
file_list = []
|
||||
for [i, url] in enumerate(info_dict['url_list']):
|
||||
tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
|
||||
file_list.append(tmpsegmentname)
|
||||
key_list = info_dict.get('key_list')
|
||||
decrypt_info = None
|
||||
dest, _ = sanitize_open(tmpfilename, 'wb')
|
||||
for i, file in enumerate(file_list):
|
||||
src, _ = sanitize_open(file, 'rb')
|
||||
if key_list:
|
||||
decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info)
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
|
||||
try:
|
||||
src, _ = sanitize_open(fragment_filename, 'rb')
|
||||
except IOError:
|
||||
if skip_unavailable_fragments and frag_index > 1:
|
||||
self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
|
||||
continue
|
||||
self.report_error('Unable to open fragment %d' % frag_index)
|
||||
return -1
|
||||
decrypt_info = fragment.get('decrypt_info')
|
||||
if decrypt_info:
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV')
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
@@ -149,19 +168,16 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
fragment_data = src.read()
|
||||
dest.write(fragment_data)
|
||||
src.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(encodeFilename(fragment_filename))
|
||||
dest.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
for file_path in file_list:
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except OSError as ose:
|
||||
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
|
||||
try:
|
||||
file_path = '%s.frag.urls' % tmpfilename
|
||||
os.remove(file_path)
|
||||
except OSError as ose:
|
||||
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
|
||||
|
||||
os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
else:
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
return p.returncode
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
@@ -245,15 +261,22 @@ class Aria2cFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-v'
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
|
||||
|
||||
@staticmethod
|
||||
def supports_manifest(manifest):
|
||||
UNSUPPORTED_FEATURES = [
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
]
|
||||
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||
return all(check_results)
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c']
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if 'url_list' not in info_dict:
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
|
||||
cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
|
||||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
cmd = [self.exe, '-c',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
@@ -261,19 +284,25 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||
cmd += self._configuration_args()
|
||||
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
if 'fragments' not in info_dict:
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
cmd += ['--auto-file-renaming=false']
|
||||
if 'url_list' in info_dict:
|
||||
cmd += verbose_level_args
|
||||
cmd += ['--uri-selector', 'inorder', '--download-result=hide']
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||
url_list_file = '%s.frag.urls' % tmpfilename
|
||||
url_list = []
|
||||
for [i, url] in enumerate(info_dict['url_list']):
|
||||
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
|
||||
url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
|
||||
stream, _ = sanitize_open(url_list_file, 'wb')
|
||||
stream.write('\n'.join(url_list).encode('utf-8'))
|
||||
stream.close()
|
||||
|
||||
cmd += ['-i', url_list_file]
|
||||
else:
|
||||
cmd += ['--', info_dict['url']]
|
||||
@@ -282,8 +311,8 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return check_executable('http', ['--version'])
|
||||
def available(cls, path=None):
|
||||
return check_executable(path or 'http', ['--version'])
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
@@ -298,7 +327,7 @@ class FFmpegFD(ExternalFD):
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
def available(cls, path=None): # path is ignored for ffmpeg
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
|
||||
@@ -7,6 +7,11 @@
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
try:
|
||||
import concurrent.futures
|
||||
can_threaded_download = True
|
||||
except ImportError:
|
||||
can_threaded_download = False
|
||||
|
||||
from ..downloader import _get_real_downloader
|
||||
from .fragment import FragmentFD
|
||||
@@ -19,12 +24,17 @@
|
||||
)
|
||||
from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
sanitize_open,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class HlsFD(FragmentFD):
|
||||
""" A limited implementation that does not require ffmpeg """
|
||||
"""
|
||||
Download segments in a m3u8 manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol and
|
||||
re-defining 'supports_manifest' function
|
||||
"""
|
||||
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@@ -53,12 +63,15 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypt
|
||||
UNSUPPORTED_FEATURES += [
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
]
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
check_results.append(with_crypto or not is_aes128_enc)
|
||||
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
def check_results():
|
||||
yield not info_dict.get('is_live')
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
yield with_crypto or not is_aes128_enc
|
||||
yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)
|
||||
for feature in UNSUPPORTED_FEATURES:
|
||||
yield not re.search(feature, manifest)
|
||||
return all(check_results())
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
@@ -70,20 +83,24 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
|
||||
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
|
||||
self.report_error('pycryptodome not found. Please install it.')
|
||||
self.report_error('pycryptodome not found. Please install')
|
||||
return False
|
||||
if self.can_download(s, info_dict, with_crypto=True):
|
||||
self.report_warning('pycryptodome is needed to download this file with hlsnative')
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
self.report_warning('pycryptodome is needed to download this file natively')
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
self.report_warning(
|
||||
'%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename()))
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
||||
if real_downloader and not real_downloader.supports_manifest(s):
|
||||
real_downloader = None
|
||||
if real_downloader:
|
||||
self.to_screen(
|
||||
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
|
||||
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
@@ -93,7 +110,7 @@ def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
fragment_urls = []
|
||||
fragments = []
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
@@ -136,14 +153,12 @@ def is_ad_fragment_end(s):
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
key_list = []
|
||||
byte_range = {}
|
||||
discontinuity_count = 0
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
download_frag = False
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
@@ -160,17 +175,20 @@ def is_ad_fragment_end(s):
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
|
||||
if real_downloader:
|
||||
fragment_urls.append(frag_url)
|
||||
continue
|
||||
download_frag = True
|
||||
fragments.append({
|
||||
'frag_index': frag_index,
|
||||
'url': frag_url,
|
||||
'decrypt_info': decrypt_info,
|
||||
'byte_range': byte_range,
|
||||
'media_sequence': media_sequence,
|
||||
})
|
||||
|
||||
elif line.startswith('#EXT-X-MAP'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
continue
|
||||
if frag_index > 0:
|
||||
self.report_error(
|
||||
'initialization fragment found after media fragments, unable to download')
|
||||
'Initialization fragment found after media fragments, unable to download')
|
||||
return False
|
||||
frag_index += 1
|
||||
map_info = parse_m3u8_attributes(line[11:])
|
||||
@@ -180,9 +198,14 @@ def is_ad_fragment_end(s):
|
||||
else compat_urlparse.urljoin(man_url, map_info.get('URI')))
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if real_downloader:
|
||||
fragment_urls.append(frag_url)
|
||||
continue
|
||||
|
||||
fragments.append({
|
||||
'frag_index': frag_index,
|
||||
'url': frag_url,
|
||||
'decrypt_info': decrypt_info,
|
||||
'byte_range': byte_range,
|
||||
'media_sequence': media_sequence
|
||||
})
|
||||
|
||||
if map_info.get('BYTERANGE'):
|
||||
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
||||
@@ -191,7 +214,6 @@ def is_ad_fragment_end(s):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
download_frag = True
|
||||
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_url = decrypt_info.get('URI')
|
||||
@@ -206,9 +228,6 @@ def is_ad_fragment_end(s):
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
key_data = decrypt_info.copy()
|
||||
key_data['INDEX'] = frag_index
|
||||
key_list.append(key_data)
|
||||
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
@@ -225,58 +244,16 @@ def is_ad_fragment_end(s):
|
||||
ad_frag_next = False
|
||||
elif line.startswith('#EXT-X-DISCONTINUITY'):
|
||||
discontinuity_count += 1
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
|
||||
if download_frag:
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if not test:
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
fragments = [fragments[0] if fragments else None]
|
||||
|
||||
if real_downloader:
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['url_list'] = fragment_urls
|
||||
info_copy['key_list'] = key_list
|
||||
info_copy['fragments'] = fragments
|
||||
fd = real_downloader(self.ydl, self.params)
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
@@ -285,5 +262,107 @@ def is_ad_fragment_end(s):
|
||||
if not success:
|
||||
return False
|
||||
else:
|
||||
def download_fragment(fragment):
|
||||
frag_index = fragment['frag_index']
|
||||
frag_url = fragment['url']
|
||||
decrypt_info = fragment['decrypt_info']
|
||||
byte_range = fragment['byte_range']
|
||||
media_sequence = fragment['media_sequence']
|
||||
|
||||
ctx['fragment_index'] = frag_index
|
||||
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if not test:
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
|
||||
return frag_content, frag_index
|
||||
|
||||
def append_fragment(frag_content, frag_index):
|
||||
if frag_content:
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
try:
|
||||
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
file.close()
|
||||
self._append_fragment(ctx, frag_content)
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
else:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if can_threaded_download and max_workers > 1:
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
futures = [pool.submit(download_fragment, fragment) for fragment in fragments]
|
||||
# timeout must be 0 to return instantly
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||
try:
|
||||
while not_done:
|
||||
# Check every 1 second for KeyboardInterrupt
|
||||
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||
done |= freshly_done
|
||||
except KeyboardInterrupt:
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# timeout must be none to cancel
|
||||
concurrent.futures.wait(not_done, timeout=None)
|
||||
raise KeyboardInterrupt
|
||||
results = [future.result() for future in futures]
|
||||
|
||||
for frag_content, frag_index in results:
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments:
|
||||
frag_content, frag_index = download_fragment(fragment)
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
return True
|
||||
|
||||
@@ -117,7 +117,7 @@ def run_rtmpdump(args):
|
||||
|
||||
# Check for rtmpdump first
|
||||
if not check_executable('rtmpdump', ['-h']):
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
|
||||
@@ -24,7 +24,7 @@ def real_download(self, filename, info_dict):
|
||||
args = [
|
||||
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
|
||||
else:
|
||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
|
||||
return False
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
@@ -79,8 +79,7 @@ def download_and_parse_fragment(url, frag_index, request_data):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
success, raw_fragment = dl_fragment(
|
||||
'https://www.youtube.com/watch?v={}'.format(video_id))
|
||||
success, raw_fragment = dl_fragment(info_dict['url'])
|
||||
if not success:
|
||||
return False
|
||||
try:
|
||||
|
||||
@@ -65,15 +65,35 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
def _real_extract(self, url):
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
properties = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
||||
display_id)['data']['properties']
|
||||
page_data = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||
% (requestor_id.lower(), display_id), display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
video_player_count = 0
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + releasePid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
if not video_player_count:
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
@@ -90,30 +110,41 @@ def _real_extract(self, url):
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_urls = [properties.get('imageDesktop')]
|
||||
if 'thumbnail' in info:
|
||||
thumbnail_urls.append(info.pop('thumbnail'))
|
||||
for thumbnail_url in thumbnail_urls:
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(mobj.group(1)) if mobj else None,
|
||||
'height': int(mobj.group(2)) if mobj else None,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
series = theplatform_metadata.get(ns + '$show') or None
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -42,6 +42,7 @@ def _real_extract(self, url):
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_timestamp': 1396483200,
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_timestamp': 1076112000,
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
@@ -197,7 +199,7 @@ def _real_extract(self, url):
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
@@ -811,7 +816,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||
else super(BBCIE, cls).suitable(url))
|
||||
|
||||
@@ -1338,21 +1343,149 @@ def _real_extract(self, url):
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
|
||||
@staticmethod
|
||||
def _get_default(episode, key, default_key='default'):
|
||||
return try_get(episode, lambda x: x[key][default_key])
|
||||
|
||||
def _get_description(self, data):
|
||||
synopsis = data.get(self._DESCRIPTION_KEY) or {}
|
||||
return dict_get(synopsis, ('large', 'medium', 'small'))
|
||||
|
||||
def _fetch_page(self, programme_id, per_page, series_id, page):
|
||||
elements = self._get_elements(self._call_api(
|
||||
programme_id, per_page, page + 1, series_id))
|
||||
for element in elements:
|
||||
episode = self._get_episode(element)
|
||||
episode_id = episode.get('id')
|
||||
if not episode_id:
|
||||
continue
|
||||
thumbnail = None
|
||||
image = self._get_episode_image(episode)
|
||||
if image:
|
||||
thumbnail = image.replace('{recipe}', 'raw')
|
||||
category = self._get_default(episode, 'labels', 'category')
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': episode_id,
|
||||
'title': self._get_episode_field(episode, 'subtitle'),
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._get_description(episode),
|
||||
'categories': [category] if category else None,
|
||||
'series': self._get_episode_field(episode, 'title'),
|
||||
'ie_key': BBCCoUkIE.ie_key(),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
pid = self._match_id(url)
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
series_id = qs.get('seriesId', [None])[0]
|
||||
page = qs.get('page', [None])[0]
|
||||
per_page = 36 if page else self._PAGE_SIZE
|
||||
fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
|
||||
entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
|
||||
playlist_data = self._get_playlist_data(self._call_api(pid, 1))
|
||||
return self.playlist_result(
|
||||
entries, pid, self._get_playlist_title(playlist_data),
|
||||
self._get_description(playlist_data))
|
||||
|
||||
|
||||
class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:episodes'
|
||||
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
'title': 'The Disappearance',
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
# all seasons
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
|
||||
'info_dict': {
|
||||
'id': 'b094m5t9',
|
||||
'title': 'Doctor Foster',
|
||||
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
# explicit season
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
|
||||
'info_dict': {
|
||||
'id': 'b094m5t9',
|
||||
'title': 'Doctor Foster',
|
||||
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
# all pages
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
|
||||
'info_dict': {
|
||||
'id': 'm0004c4v',
|
||||
'title': 'Beechgrove',
|
||||
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
# explicit page
|
||||
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
|
||||
'info_dict': {
|
||||
'id': 'm0004c4v',
|
||||
'title': 'Beechgrove',
|
||||
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
_DESCRIPTION_KEY = 'synopsis'
|
||||
|
||||
def _get_episode_image(self, episode):
|
||||
return self._get_default(episode, 'image')
|
||||
|
||||
def _get_episode_field(self, episode, field):
|
||||
return self._get_default(episode, field)
|
||||
|
||||
@staticmethod
|
||||
def _get_elements(data):
|
||||
return data['entities']['results']
|
||||
|
||||
@staticmethod
|
||||
def _get_episode(element):
|
||||
return element.get('episode') or {}
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
variables = {
|
||||
'id': pid,
|
||||
'page': page,
|
||||
'perPage': per_page,
|
||||
}
|
||||
if series_id:
|
||||
variables['sliceId'] = series_id
|
||||
return self._download_json(
|
||||
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||
'Content-Type': 'application/json'
|
||||
}, data=json.dumps({
|
||||
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||
'variables': variables,
|
||||
}).encode('utf-8'))['data']['programme']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
return data
|
||||
|
||||
def _get_playlist_title(self, data):
|
||||
return self._get_default(data, 'title')
|
||||
|
||||
|
||||
class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:group'
|
||||
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
|
||||
_TESTS = [{
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
@@ -1361,14 +1494,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
# all pages
|
||||
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
|
||||
'info_dict': {
|
||||
'id': 'p081d7j7',
|
||||
'title': 'Music in Scotland',
|
||||
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||
},
|
||||
'playlist_mincount': 47,
|
||||
}, {
|
||||
# explicit page
|
||||
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
|
||||
'info_dict': {
|
||||
'id': 'p081d7j7',
|
||||
'title': 'Music in Scotland',
|
||||
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
_PAGE_SIZE = 200
|
||||
_DESCRIPTION_KEY = 'synopses'
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
||||
webpage, 'description', fatal=False, group='value')
|
||||
return title, description
|
||||
def _get_episode_image(self, episode):
|
||||
return self._get_default(episode, 'images', 'standard')
|
||||
|
||||
def _get_episode_field(self, episode, field):
|
||||
return episode.get(field)
|
||||
|
||||
@staticmethod
|
||||
def _get_elements(data):
|
||||
return data['elements']
|
||||
|
||||
@staticmethod
|
||||
def _get_episode(element):
|
||||
return element
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
return self._download_json(
|
||||
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||
pid, query={
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
})['group_episodes']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
return data['group']
|
||||
|
||||
def _get_playlist_title(self, data):
|
||||
return data.get('title')
|
||||
|
||||
|
||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -15,6 +16,7 @@
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
@@ -113,6 +115,13 @@ class BiliBiliIE(InfoExtractor):
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Anthology
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
@@ -139,9 +148,19 @@ def _real_extract(self, url):
|
||||
page_id = mobj.group('page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
# If the video has no page argument, check to see if it's an anthology
|
||||
if page_id is None:
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
r = self._extract_anthology_entries(bv_id, video_id, webpage)
|
||||
if r is not None:
|
||||
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||
return r
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
|
||||
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid',
|
||||
default=None
|
||||
) or self._search_regex(
|
||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
@@ -170,6 +189,7 @@ def _real_extract(self, url):
|
||||
cid = js['result']['cid']
|
||||
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
@@ -223,7 +243,18 @@ def _real_extract(self, url):
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title') + ('_p' + str(page_id) if page_id is not None else '')
|
||||
group='title')
|
||||
|
||||
# Get part title for anthologies
|
||||
if page_id is not None:
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||
part_title = try_get(
|
||||
self._download_json(
|
||||
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||
video_id, note='Extracting videos in anthology'),
|
||||
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||
title = part_title or title
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||
@@ -233,7 +264,7 @@ def _real_extract(self, url):
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
||||
'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
||||
'cid': cid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
@@ -299,7 +330,7 @@ def get_comments():
|
||||
|
||||
global_info = {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'id': compat_str(video_id),
|
||||
'bv_id': bv_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
@@ -311,6 +342,20 @@ def get_comments():
|
||||
|
||||
return global_info
|
||||
|
||||
def _extract_anthology_entries(self, bv_id, video_id, webpage):
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
json_data = self._download_json(
|
||||
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||
video_id, note='Extracting videos in anthology')
|
||||
|
||||
if len(json_data['data']) > 1:
|
||||
return self.playlist_from_matches(
|
||||
json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
|
||||
getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
|
||||
|
||||
def _get_video_id_set(self, id, is_bv):
|
||||
query = {'bvid': id} if is_bv else {'aid': id}
|
||||
response = self._download_json(
|
||||
@@ -505,7 +550,7 @@ def _get_n_results(self, query, n):
|
||||
|
||||
videos = data['result']
|
||||
for video in videos:
|
||||
e = self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
|
||||
e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid']))
|
||||
entries.append(e)
|
||||
|
||||
if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
|
||||
|
||||
@@ -27,10 +27,10 @@ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'ext': 'mp4',
|
||||
@@ -52,16 +52,19 @@ class CBSIE(CBSBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
'https://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': site, 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||
tp_release_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
|
||||
@@ -231,8 +231,9 @@ class InfoExtractor(object):
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
@@ -251,8 +252,8 @@ class InfoExtractor(object):
|
||||
* "data": The subtitles file contents
|
||||
* "url": A URL pointing to the subtitles file
|
||||
"ext" will be calculated from URL if missing
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
automatic_captions: Like 'subtitles'; contains automatically generated
|
||||
captions instead of normal subtitles
|
||||
duration: Length of the video in seconds, as an integer or float.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
@@ -264,6 +265,7 @@ class InfoExtractor(object):
|
||||
properties (all but one of text or html optional):
|
||||
* "author" - human-readable name of the comment author
|
||||
* "author_id" - user ID of the comment author
|
||||
* "author_thumbnail" - The thumbnail of the comment author
|
||||
* "id" - Comment ID
|
||||
* "html" - Comment as HTML
|
||||
* "text" - Plain text of the comment
|
||||
@@ -271,6 +273,12 @@ class InfoExtractor(object):
|
||||
* "parent" - ID of the comment this one is replying to.
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
* "like_count" - Number of positive ratings of the comment
|
||||
* "dislike_count" - Number of negative ratings of the comment
|
||||
* "is_favorited" - Whether the comment is marked as
|
||||
favorite by the video uploader
|
||||
* "author_is_uploader" - Whether the comment is made by
|
||||
the video uploader
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||
should allow to get the same result again. (It will be set
|
||||
@@ -293,7 +301,11 @@ class InfoExtractor(object):
|
||||
playable_in_embed: Whether this video is allowed to play in embedded
|
||||
players on other sites. Can be True (=always allowed),
|
||||
False (=never allowed), None (=unknown), or a string
|
||||
specifying the criteria for embedability (Eg: 'whitelist').
|
||||
specifying the criteria for embedability (Eg: 'whitelist')
|
||||
availability: Under what condition the video is available. One of
|
||||
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
||||
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||
to set it
|
||||
__post_extractor: A function to be called just before the metadata is
|
||||
written to either disk, logger or console. The function
|
||||
must return a dict which will be added to the info_dict.
|
||||
@@ -1409,8 +1421,8 @@ class FormatSort:
|
||||
'ie_pref': {'priority': True, 'type': 'extractor'},
|
||||
'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||
'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||
'lang': {'priority': True, 'convert': 'ignore', 'type': 'extractor', 'field': 'language_preference'},
|
||||
'quality': {'convert': 'float_none', 'type': 'extractor'},
|
||||
'lang': {'priority': True, 'convert': 'ignore', 'field': 'language_preference'},
|
||||
'quality': {'convert': 'float_none'},
|
||||
'filesize': {'convert': 'bytes'},
|
||||
'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
|
||||
'id': {'convert': 'string', 'field': 'format_id'},
|
||||
@@ -1421,7 +1433,7 @@ class FormatSort:
|
||||
'vbr': {'convert': 'float_none'},
|
||||
'abr': {'convert': 'float_none'},
|
||||
'asr': {'convert': 'float_none'},
|
||||
'source': {'convert': 'ignore', 'type': 'extractor', 'field': 'source_preference'},
|
||||
'source': {'convert': 'ignore', 'field': 'source_preference'},
|
||||
|
||||
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
|
||||
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
|
||||
@@ -1849,8 +1861,9 @@ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None, quality=None,
|
||||
m3u8_id=None, live=False, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False, data=None, headers={},
|
||||
query={}):
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
@@ -2050,11 +2063,11 @@ def build_stream_name():
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
for format in playlist_formats:
|
||||
for frmt in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = format.get('index')
|
||||
format_index = frmt.get('index')
|
||||
stream_name = build_stream_name()
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
@@ -2109,6 +2122,8 @@ def build_stream_name():
|
||||
# TODO: update acodec for audio only formats with
|
||||
# the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
if not f.get('ext'):
|
||||
f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4'
|
||||
formats.append(f)
|
||||
|
||||
# for DailyMotion
|
||||
@@ -3321,6 +3336,20 @@ def _generic_id(self, url):
|
||||
def _generic_title(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
|
||||
@staticmethod
|
||||
def _availability(is_private, needs_premium, needs_subscription, needs_auth, is_unlisted):
|
||||
all_known = all(map(
|
||||
lambda x: x is not None,
|
||||
(is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
|
||||
return (
|
||||
'private' if is_private
|
||||
else 'premium_only' if needs_premium
|
||||
else 'subscriber_only' if needs_subscription
|
||||
else 'needs_auth' if needs_auth
|
||||
else 'unlisted' if is_unlisted
|
||||
else 'public' if all_known
|
||||
else None)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -108,7 +108,8 @@
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
BBCCoUkIPlayerPlaylistIE,
|
||||
BBCCoUkIPlayerEpisodesIE,
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
@@ -450,10 +451,7 @@
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedi import (
|
||||
GediIE,
|
||||
GediEmbedsIE,
|
||||
)
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
@@ -711,7 +709,10 @@
|
||||
MixcloudUserIE,
|
||||
MixcloudPlaylistIE,
|
||||
)
|
||||
from .mlb import MLBIE
|
||||
from .mlb import (
|
||||
MLBIE,
|
||||
MLBVideoIE,
|
||||
)
|
||||
from .mnet import MnetIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import (
|
||||
@@ -735,6 +736,8 @@
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
MTVJapanIE,
|
||||
MTVItaliaIE,
|
||||
MTVItaliaProgrammaIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
@@ -823,7 +826,7 @@
|
||||
NickNightIE,
|
||||
NickRuIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE, NiconicoUserIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
@@ -954,6 +957,7 @@
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .plutotv import PlutoTVIE
|
||||
from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
@@ -1179,7 +1183,10 @@
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
StitcherShowIE,
|
||||
)
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
@@ -1560,6 +1567,7 @@
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimtv import WimTVIE
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
@@ -1666,8 +1674,14 @@
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zee5 import (
|
||||
Zee5IE,
|
||||
Zee5SeriesIE,
|
||||
)
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zee5 import Zee5IE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
@@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
base_url,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class GediBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _clean_audio_fmts(formats):
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if 'acodec' in f:
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = re.findall(
|
||||
r'PlayerFactory\.setParam\(\'(?P<type>.+?)\',\s*\'(?P<name>.+?)\',\s*\'(?P<val>.+?)\'\);',
|
||||
webpage)
|
||||
|
||||
formats = []
|
||||
audio_fmts = []
|
||||
hls_fmts = []
|
||||
http_fmts = []
|
||||
title = ''
|
||||
thumb = ''
|
||||
|
||||
fmt_reg = r'(?P<t>video|audio)-(?P<p>rrtv|hls)-(?P<h>[\w\d]+)(?:-(?P<br>[\w\d]+))?$'
|
||||
br_reg = r'video-rrtv-(?P<br>\d+)-'
|
||||
|
||||
for t, n, v in player_data:
|
||||
if t == 'format':
|
||||
m = re.match(fmt_reg, n)
|
||||
if m:
|
||||
# audio formats
|
||||
if m.group('t') == 'audio':
|
||||
if m.group('p') == 'hls':
|
||||
audio_fmts.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'm4a', m3u8_id='hls', fatal=False))
|
||||
elif m.group('p') == 'rrtv':
|
||||
audio_fmts.append({
|
||||
'format_id': 'mp3',
|
||||
'url': v,
|
||||
'tbr': 128,
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
})
|
||||
|
||||
# video formats
|
||||
elif m.group('t') == 'video':
|
||||
# hls manifest video
|
||||
if m.group('p') == 'hls':
|
||||
hls_fmts.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
# direct mp4 video
|
||||
elif m.group('p') == 'rrtv':
|
||||
if not m.group('br'):
|
||||
mm = re.search(br_reg, v)
|
||||
http_fmts.append({
|
||||
'format_id': 'https-' + m.group('h'),
|
||||
'protocol': 'https',
|
||||
'url': v,
|
||||
'tbr': int(m.group('br')) if m.group('br') else
|
||||
(int(mm.group('br')) if mm.group('br') else 0),
|
||||
'height': int(m.group('h'))
|
||||
})
|
||||
|
||||
elif t == 'param':
|
||||
if n == 'videotitle':
|
||||
title = v
|
||||
if n == 'image_full_play':
|
||||
thumb = v
|
||||
|
||||
title = self._og_search_title(webpage) if title == '' else title
|
||||
|
||||
# clean weird char
|
||||
title = compat_str(title).encode('utf8', 'replace').replace(b'\xc3\x82', b'').decode('utf8', 'replace')
|
||||
|
||||
if audio_fmts:
|
||||
self._clean_audio_fmts(audio_fmts)
|
||||
self._sort_formats(audio_fmts)
|
||||
if hls_fmts:
|
||||
self._sort_formats(hls_fmts)
|
||||
if http_fmts:
|
||||
self._sort_formats(http_fmts)
|
||||
|
||||
formats.extend(audio_fmts)
|
||||
formats.extend(hls_fmts)
|
||||
formats.extend(http_fmts)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('twitter:description', webpage),
|
||||
'thumbnail': thumb,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class GediIE(GediBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/(?!embed/).+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||
'info_dict': {
|
||||
'id': '121559/121683',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||
'md5': 'e763b94b7920799a0e0e23ffefa2d157',
|
||||
'info_dict': {
|
||||
'id': '367415/367963',
|
||||
'ext': 'mp4',
|
||||
'title': 'Record della pista a Spa Francorchamps, la Pagani Huayra Roadster BC stupisce',
|
||||
'description': 'md5:5deb503cefe734a3eb3f07ed74303920',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||
'md5': 'e48108e97b1af137d22a8469f2019057',
|
||||
'info_dict': {
|
||||
'id': '66184/66267',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cassani e i brividi azzurri ai Mondiali di Imola: \\"Qui mi sono innamorato del ciclismo da ragazzino, incredibile tornarci da ct\\"',
|
||||
'description': 'md5:fc9c50894f70a2469bb9b54d3d0a3d3b',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||
'md5': 'a6e39f3bdc1842bbd92abbbbef230817',
|
||||
'info_dict': {
|
||||
'id': '141059/142723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dentro la notizia - Ferrari, cosa succede a Maranello',
|
||||
'description': 'md5:9907d65b53765681fa3a0b3122617c1f',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class GediEmbedsIE(GediBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/embed/.+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
||||
'md5': 'f4ac23cadfea7fef89bea536583fa7ed',
|
||||
'info_dict': {
|
||||
'id': '29312/29276',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cotticelli: \\"Non so cosa mi sia successo. Sto cercando di capire se ho avuto un malore\\"',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||
'md5': '0391c2c83c6506581003aaf0255889c0',
|
||||
'info_dict': {
|
||||
'id': '14772/14870',
|
||||
'ext': 'mp4',
|
||||
'title': 'Festival EMERGENCY, Villa: «La buona informazione aiuta la salute» (14772-14870)',
|
||||
'description': 'md5:2bce954d278248f3c950be355b7c2226',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_urls(urls):
|
||||
# add protocol if missing
|
||||
for i, e in enumerate(urls):
|
||||
if e.startswith('//'):
|
||||
urls[i] = 'https:%s' % e
|
||||
# clean iframes urls
|
||||
for i, e in enumerate(urls):
|
||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
data-frame-src=|
|
||||
<iframe[^\n]+src=
|
||||
)
|
||||
(["'])
|
||||
(?P<url>https?://video\.
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|huffingtonpost
|
||||
|ilsecoloxix
|
||||
|iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)
|
||||
(?:\.gelocal)?\.it/embed/.+?)
|
||||
\1''', webpage)]
|
||||
return GediEmbedsIE._sanitize_urls(entries)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = GediEmbedsIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
210
yt_dlp/extractor/gedidigital.py
Normal file
210
yt_dlp/extractor/gedidigital.py
Normal file
@@ -0,0 +1,210 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
base_url,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class GediDigitalIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\.
|
||||
(?:
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|ilsecoloxix
|
||||
|huffingtonpost
|
||||
)|
|
||||
(?:
|
||||
iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)\.gelocal
|
||||
)\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||
'info_dict': {
|
||||
'id': '121683',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_urls(urls):
|
||||
# add protocol if missing
|
||||
for i, e in enumerate(urls):
|
||||
if e.startswith('//'):
|
||||
urls[i] = 'https:%s' % e
|
||||
# clean iframes urls
|
||||
for i, e in enumerate(urls):
|
||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = [
|
||||
mobj.group('eurl')
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
data-frame-src=|
|
||||
<iframe[^\n]+src=
|
||||
)
|
||||
(["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
|
||||
return GediDigitalIE._sanitize_urls(entries)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = GediDigitalIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _clean_formats(formats):
|
||||
format_urls = set()
|
||||
clean_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
if f.get('audio_ext') != 'none' and not f.get('acodec'):
|
||||
continue
|
||||
format_urls.add(f['url'])
|
||||
clean_formats.append(f)
|
||||
formats[:] = clean_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = re.match(self._VALID_URL, url).group('url')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta(
|
||||
['twitter:title', 'og:title'], webpage, fatal=True)
|
||||
player_data = re.findall(
|
||||
r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
|
||||
webpage)
|
||||
|
||||
formats = []
|
||||
duration = thumb = None
|
||||
for t, n, v in player_data:
|
||||
if t == 'format':
|
||||
if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
|
||||
continue
|
||||
elif n.endswith('-vod-ak'):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
v, video_id, {'http': 'media.gedidigital.it'}))
|
||||
else:
|
||||
ext = determine_ext(v)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': n,
|
||||
'url': v,
|
||||
}
|
||||
if ext == 'mp3':
|
||||
abr = int_or_none(self._search_regex(
|
||||
r'-mp3-audio-(\d+)', v, 'abr', default=None))
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
'acodec': ext,
|
||||
'vcodec': 'none'
|
||||
})
|
||||
else:
|
||||
mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(1)),
|
||||
'vbr': int_or_none(mobj.group(2)),
|
||||
})
|
||||
if not f.get('vbr'):
|
||||
f['vbr'] = int_or_none(self._search_regex(
|
||||
r'-video-rrtv-(\d+)', v, 'abr', default=None))
|
||||
formats.append(f)
|
||||
elif t == 'param':
|
||||
if n in ['image_full', 'image']:
|
||||
thumb = v
|
||||
elif n == 'videoDuration':
|
||||
duration = int_or_none(v)
|
||||
|
||||
self._clean_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['twitter:description', 'og:description', 'description'], webpage),
|
||||
'thumbnail': thumb or self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
}
|
||||
@@ -127,13 +127,14 @@
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .gedi import GediEmbedsIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .rcs import RCSEmbedsIE
|
||||
from .bitchute import BitChuteIE
|
||||
from .rumble import RumbleEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
from .wimtv import WimTVIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2250,6 +2251,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 52,
|
||||
},
|
||||
{
|
||||
# WimTv embed player
|
||||
'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
|
||||
'info_dict': {
|
||||
'id': 'wearefmi-pt-2-2021',
|
||||
'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2955,7 +2965,7 @@ def _real_extract(self, url):
|
||||
webpage)
|
||||
if not mobj:
|
||||
mobj = re.search(
|
||||
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
|
||||
r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
@@ -3339,17 +3349,22 @@ def _real_extract(self, url):
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
# Look for RCS media group embeds
|
||||
gedi_urls = GediEmbedsIE._extract_urls(webpage)
|
||||
gedi_urls = GediDigitalIE._extract_urls(webpage)
|
||||
if gedi_urls:
|
||||
return self.playlist_from_matches(
|
||||
gedi_urls, video_id, video_title, ie=GediEmbedsIE.ie_key())
|
||||
gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
|
||||
|
||||
# Look for RCS media group embeds
|
||||
rcs_urls = RCSEmbedsIE._extract_urls(webpage)
|
||||
if rcs_urls:
|
||||
return self.playlist_from_matches(
|
||||
rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
|
||||
|
||||
wimtv_urls = WimTVIE._extract_urls(webpage)
|
||||
if wimtv_urls:
|
||||
return self.playlist_from_matches(
|
||||
wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
|
||||
|
||||
bitchute_urls = BitChuteIE._extract_urls(webpage)
|
||||
if bitchute_urls:
|
||||
return self.playlist_from_matches(
|
||||
|
||||
@@ -146,7 +146,7 @@ def _real_extract(self, url):
|
||||
expected=True)
|
||||
elif not pycryptodomex_found:
|
||||
raise ExtractorError(
|
||||
'pycryptodomex not found. Please install it.',
|
||||
'pycryptodomex not found. Please install',
|
||||
expected=True)
|
||||
elif message:
|
||||
extractor_msg += ': ' + message
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -21,9 +23,9 @@
|
||||
|
||||
|
||||
class LBRYBaseIE(InfoExtractor):
|
||||
_BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
|
||||
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
@@ -41,7 +43,9 @@ def _resolve_url(self, url, display_id, resource):
|
||||
'resolve', display_id, {'urls': url}, resource)[url]
|
||||
|
||||
def _permanent_url(self, url, claim_name, claim_id):
|
||||
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
|
||||
return urljoin(
|
||||
url.replace('lbry://', 'https://lbry.tv/'),
|
||||
'/%s:%s' % (claim_name, claim_id))
|
||||
|
||||
def _parse_stream(self, stream, url):
|
||||
stream_value = stream.get('value') or {}
|
||||
@@ -60,6 +64,7 @@ def _parse_stream(self, stream, url):
|
||||
'description': stream_value.get('description'),
|
||||
'license': stream_value.get('license'),
|
||||
'timestamp': int_or_none(stream.get('timestamp')),
|
||||
'release_timestamp': int_or_none(stream_value.get('release_time')),
|
||||
'tags': stream_value.get('tags'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||
@@ -92,6 +97,8 @@ class LBRYIE(LBRYBaseIE):
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
'release_timestamp': 1595340697,
|
||||
'release_date': '20200721',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}
|
||||
@@ -106,6 +113,8 @@ class LBRYIE(LBRYBaseIE):
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
'release_timestamp': 1591312421,
|
||||
'release_date': '20200604',
|
||||
'tags': list,
|
||||
'duration': 2570,
|
||||
'channel': 'The LBRY Foundation',
|
||||
@@ -137,6 +146,9 @@ class LBRYIE(LBRYBaseIE):
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'lbry://@lbry#3f/odysee#7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -166,7 +178,7 @@ def _real_extract(self, url):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
'info_dict': {
|
||||
@@ -178,20 +190,24 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@LBRYFoundation',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'lbry://@lbry#3f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, page):
|
||||
def _fetch_page(self, claim_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}
|
||||
page_params.update(params)
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
||||
}, 'page %d' % page)
|
||||
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
@@ -212,8 +228,31 @@ def _real_extract(self, url):
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url),
|
||||
functools.partial(self._fetch_page, claim_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'What Is Data Science',
|
||||
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||
'timestamp': 1607387907,
|
||||
'upload_date': '20201208',
|
||||
'timestamp': int, # The timestamp and upload date changes
|
||||
'upload_date': r're:\d+',
|
||||
'duration': 304,
|
||||
},
|
||||
'params': {
|
||||
@@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 41,
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
}, {
|
||||
'url': 'https://linuxacademy.com/cp/modules/view/id/39',
|
||||
'info_dict': {
|
||||
'id': '39',
|
||||
'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
|
||||
'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
|
||||
'duration': 89280,
|
||||
},
|
||||
'playlist_count': 73,
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
}]
|
||||
|
||||
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||
@@ -102,7 +112,7 @@ def random_string():
|
||||
'client_id': self._CLIENT_ID,
|
||||
'redirect_uri': self._ORIGIN_URL,
|
||||
'tenant': 'lacausers',
|
||||
'connection': 'Username-Password-Authentication',
|
||||
'connection': 'Username-Password-ACG-Proxy',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'sso': 'true',
|
||||
@@ -162,7 +172,7 @@ def _real_extract(self, url):
|
||||
if course_id:
|
||||
module = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
||||
r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
|
||||
item_id)
|
||||
entries = []
|
||||
chapter_number = None
|
||||
|
||||
@@ -1,15 +1,91 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nhl import NHLBaseIE
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MLBIE(NHLBaseIE):
|
||||
class MLBBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
video = self._download_video_data(display_id)
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
feed = self._get_feed(video)
|
||||
|
||||
formats = []
|
||||
for playback in (feed.get('playbacks') or []):
|
||||
playback_url = playback.get('url')
|
||||
if not playback_url:
|
||||
continue
|
||||
name = playback.get('name')
|
||||
ext = determine_ext(playback_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playback_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id=name, fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'format_id': name,
|
||||
'url': playback_url,
|
||||
}
|
||||
mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(3)),
|
||||
'tbr': int(mobj.group(1)),
|
||||
'width': int(mobj.group(2)),
|
||||
})
|
||||
mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'fps': int(mobj.group(3)),
|
||||
'height': int(mobj.group(2)),
|
||||
'tbr': int(mobj.group(4)),
|
||||
'width': int(mobj.group(1)),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
|
||||
src = cut.get('src')
|
||||
if not src:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'height': int_or_none(cut.get('height')),
|
||||
'url': src,
|
||||
'width': int_or_none(cut.get('width')),
|
||||
})
|
||||
|
||||
language = (video.get('language') or 'EN').lower()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': video.get('description'),
|
||||
'duration': parse_duration(feed.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
|
||||
'subtitles': self._extract_mlb_subtitles(feed, language),
|
||||
}
|
||||
|
||||
|
||||
class MLBIE(MLBBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
||||
(?:[\da-z_-]+\.)*mlb\.com/
|
||||
(?:
|
||||
(?:
|
||||
(?:[^/]+/)*c-|
|
||||
(?:[^/]+/)*video/[^/]+/c-|
|
||||
(?:
|
||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||
@@ -18,7 +94,6 @@ class MLBIE(NHLBaseIE):
|
||||
(?P<id>\d+)
|
||||
)
|
||||
'''
|
||||
_CONTENT_DOMAIN = 'content.mlb.com'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||
@@ -76,18 +151,6 @@ class MLBIE(NHLBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
||||
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
||||
'info_dict': {
|
||||
'id': '75609783',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must C: Pillar climbs for catch',
|
||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||
'timestamp': 1429139220,
|
||||
'upload_date': '20150415',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
||||
'only_matching': True,
|
||||
@@ -113,8 +176,92 @@ class MLBIE(NHLBaseIE):
|
||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_TIMESTAMP_KEY = 'date'
|
||||
|
||||
@staticmethod
|
||||
def _get_feed(video):
|
||||
return video
|
||||
|
||||
@staticmethod
|
||||
def _extract_mlb_subtitles(feed, language):
|
||||
subtitles = {}
|
||||
for keyword in (feed.get('keywordsAll') or []):
|
||||
keyword_type = keyword.get('type')
|
||||
if keyword_type and keyword_type.startswith('closed_captions_location_'):
|
||||
cc_location = keyword.get('value')
|
||||
if cc_location:
|
||||
subtitles.setdefault(language, []).append({
|
||||
'url': cc_location,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _download_video_data(self, display_id):
|
||||
return self._download_json(
|
||||
'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
|
||||
display_id)
|
||||
|
||||
|
||||
class MLBVideoIE(MLBBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
|
||||
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||
'info_dict': {
|
||||
'id': 'c04a8863-f569-42e6-9f87-992393657614',
|
||||
'ext': 'mp4',
|
||||
'title': "Ackley's spectacular catch",
|
||||
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||
'duration': 66,
|
||||
'timestamp': 1405995000,
|
||||
'upload_date': '20140722',
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
}
|
||||
_TIMESTAMP_KEY = 'timestamp'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _get_feed(video):
|
||||
return video['feeds'][0]
|
||||
|
||||
@staticmethod
|
||||
def _extract_mlb_subtitles(feed, language):
|
||||
subtitles = {}
|
||||
for cc_location in (feed.get('closedCaptions') or []):
|
||||
subtitles.setdefault(language, []).append({
|
||||
'url': cc_location,
|
||||
})
|
||||
|
||||
def _download_video_data(self, display_id):
|
||||
# https://www.mlb.com/data-service/en/videos/[SLUG]
|
||||
return self._download_json(
|
||||
'https://fastball-gateway.mlb.com/graphql',
|
||||
display_id, query={
|
||||
'query': '''{
|
||||
mediaPlayback(ids: "%s") {
|
||||
description
|
||||
feeds(types: CMS) {
|
||||
closedCaptions
|
||||
duration
|
||||
image {
|
||||
cuts {
|
||||
width
|
||||
height
|
||||
src
|
||||
}
|
||||
}
|
||||
playbacks {
|
||||
name
|
||||
url
|
||||
}
|
||||
}
|
||||
id
|
||||
timestamp
|
||||
title
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['mediaPlayback'][0]
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
@@ -176,6 +177,22 @@ def _get_video_info(self, itemdoc, use_hls=True):
|
||||
raise ExtractorError('Could not find video title')
|
||||
title = title.strip()
|
||||
|
||||
series = find_xpath_attr(
|
||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:franchise')
|
||||
season = find_xpath_attr(
|
||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:seasonN')
|
||||
episode = find_xpath_attr(
|
||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:episodeN')
|
||||
series = series.text if series is not None else None
|
||||
season = season.text if season is not None else None
|
||||
episode = episode.text if episode is not None else None
|
||||
if season and episode:
|
||||
# episode number includes season, so remove it
|
||||
episode = re.sub(r'^%s' % season, '', episode)
|
||||
|
||||
# This a short id that's used in the webpage urls
|
||||
mtvn_id = None
|
||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
@@ -201,6 +218,9 @@ def _get_video_info(self, itemdoc, use_hls=True):
|
||||
'description': description,
|
||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
'series': series,
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
}
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
@@ -320,7 +340,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
@@ -483,3 +503,152 @@ def _get_feed_query(self, uri):
|
||||
'arcEp': 'mtv.de',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
|
||||
class MTVItaliaIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
|
||||
'info_dict': {
|
||||
'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cavoli amario (episodio completo)',
|
||||
'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
|
||||
'series': 'Mario - Una Serie Di Maccio Capatonda',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IT']
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'arcEp': 'mtv.it',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
|
||||
class MTVItaliaProgrammaIE(MTVItaliaIE):
|
||||
IE_NAME = 'mtv.it:programma'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
# program page: general
|
||||
'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
|
||||
'info_dict': {
|
||||
'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
|
||||
'title': 'Mario - Una Serie Di Maccio Capatonda',
|
||||
'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# program page: specific season
|
||||
'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
|
||||
'info_dict': {
|
||||
'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
|
||||
'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
|
||||
},
|
||||
'playlist_count': 34,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# playlist page + redirect
|
||||
'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
|
||||
'info_dict': {
|
||||
'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
|
||||
'title': 'Sexy Videos',
|
||||
},
|
||||
'playlist_mincount': 145,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IT']
|
||||
_FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
|
||||
|
||||
def _get_entries(self, title, url):
|
||||
while True:
|
||||
pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
|
||||
entries = self._download_json(url, title, 'page %s' % pg)
|
||||
url = try_get(
|
||||
entries, lambda x: x['result']['nextPageURL'], compat_str)
|
||||
entries = try_get(
|
||||
entries, (
|
||||
lambda x: x['result']['data']['items'],
|
||||
lambda x: x['result']['data']['seasons']),
|
||||
list)
|
||||
for entry in entries or []:
|
||||
if entry.get('canonicalURL'):
|
||||
yield self.url_result(entry['canonicalURL'])
|
||||
if not url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = {'url': url}
|
||||
info_url = update_url_query(self._FEED_URL, query)
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(info_url, video_id).get('manifest')
|
||||
|
||||
redirect = try_get(
|
||||
info, lambda x: x['newLocation']['url'], compat_str)
|
||||
if redirect:
|
||||
return self.url_result(redirect)
|
||||
|
||||
title = info.get('title')
|
||||
video_id = try_get(
|
||||
info, lambda x: x['reporting']['itemId'], compat_str)
|
||||
parent_id = try_get(
|
||||
info, lambda x: x['reporting']['parentId'], compat_str)
|
||||
|
||||
playlist_url = current_url = None
|
||||
for z in (info.get('zones') or {}).values():
|
||||
if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
|
||||
info_url = z.get('feed')
|
||||
if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
|
||||
playlist_url = playlist_url or z.get('feed')
|
||||
if z.get('moduleName') in ('INTL_M300',):
|
||||
current_url = current_url or z.get('feed')
|
||||
|
||||
if not info_url:
|
||||
raise ExtractorError('No info found')
|
||||
|
||||
if video_id == parent_id:
|
||||
video_id = self._search_regex(
|
||||
r'([^\/]+)/[^\/]+$', info_url, 'video_id')
|
||||
|
||||
info = self._download_json(info_url, video_id, 'Show infos')
|
||||
info = try_get(info, lambda x: x['result']['data'], dict)
|
||||
title = title or try_get(
|
||||
info, (
|
||||
lambda x: x['title'],
|
||||
lambda x: x['headline']),
|
||||
compat_str)
|
||||
description = try_get(info, lambda x: x['content'], compat_str)
|
||||
|
||||
if current_url:
|
||||
season = try_get(
|
||||
self._download_json(playlist_url, video_id, 'Seasons info'),
|
||||
lambda x: x['result']['data'], dict)
|
||||
current = try_get(
|
||||
season, lambda x: x['currentSeason'], compat_str)
|
||||
seasons = try_get(
|
||||
season, lambda x: x['seasons'], list) or []
|
||||
|
||||
if current in [s.get('eTitle') for s in seasons]:
|
||||
playlist_url = current_url
|
||||
|
||||
title = re.sub(
|
||||
r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
|
||||
'', title, flags=re.IGNORECASE).strip()
|
||||
|
||||
return self.playlist_result(
|
||||
self._get_entries(title, playlist_url),
|
||||
video_id, title, description)
|
||||
|
||||
@@ -6,98 +6,122 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
qualities,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
VALID_STREAMS = ('dash', )
|
||||
|
||||
|
||||
class MxplayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mxplayer\.in/movie/(?P<slug>[a-z0-9]+(?:-[a-z0-9]+)*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?:show|movie)/(?:(?P<display_id>[-/a-z0-9]+)-)?(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
|
||||
'info_dict': {
|
||||
'id': 'b9fa28df3bfb8758874735bbd7d2655a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knock Knock Movie | Watch 2015 Knock Knock Full Movie Online- MX Player',
|
||||
'title': 'Knock Knock (Hindi Dubbed)',
|
||||
'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo'
|
||||
}
|
||||
}
|
||||
|
||||
def _get_best_stream_url(self, stream):
|
||||
best_stream = list(filter(None, [v for k, v in stream.items()]))
|
||||
return best_stream.pop(0) if len(best_stream) else None
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
|
||||
'info_dict': {
|
||||
'id': '45055d5bcff169ad48f2ad7552a83d6c',
|
||||
'ext': 'm3u8',
|
||||
'title': 'The infamous taxi gang of Meerut',
|
||||
'description': 'md5:033a0a7e3fd147be4fb7e07a01a3dc28',
|
||||
'season': 'Season 1',
|
||||
'series': 'Shaitaan'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
|
||||
'info_dict': {
|
||||
'id': 'd445579792b0135598ba1bc9088a84cb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Duh Swapna',
|
||||
'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
|
||||
'season': 'Chapter 1',
|
||||
'series': 'Aashram'
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo'
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_stream_urls(self, video_dict):
|
||||
stream_dict = video_dict.get('stream', {'provider': {}})
|
||||
stream_provider = stream_dict.get('provider')
|
||||
stream_provider_dict = try_get(
|
||||
video_dict,
|
||||
lambda x: x['stream'][x['stream']['provider']])
|
||||
if not stream_provider_dict:
|
||||
raise ExtractorError('No stream provider found', expected=True)
|
||||
|
||||
if not stream_dict[stream_provider]:
|
||||
message = 'No stream provider found'
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
streams = []
|
||||
for stream_name, v in stream_dict[stream_provider].items():
|
||||
if stream_name in VALID_STREAMS:
|
||||
stream_url = self._get_best_stream_url(v)
|
||||
if stream_url is None:
|
||||
continue
|
||||
streams.append((stream_name, stream_url))
|
||||
return streams
|
||||
for stream_name, stream in stream_provider_dict.items():
|
||||
if stream_name in ('hls', 'dash', 'hlsUrl', 'dashUrl'):
|
||||
stream_type = stream_name.replace('Url', '')
|
||||
if isinstance(stream, dict):
|
||||
for quality, stream_url in stream.items():
|
||||
if stream_url:
|
||||
yield stream_type, quality, stream_url
|
||||
else:
|
||||
yield stream_type, 'base', stream
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_slug = mobj.group('slug')
|
||||
|
||||
video_id = video_slug.split('-')[-1]
|
||||
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
window_state_json = self._html_search_regex(
|
||||
r'(?s)<script>window\.state\s*[:=]\s(\{.+\})\n(\w+).*(</script>).*',
|
||||
webpage, 'WindowState')
|
||||
|
||||
source = self._parse_json(js_to_json(window_state_json), video_id)
|
||||
source = self._parse_json(
|
||||
js_to_json(self._html_search_regex(
|
||||
r'(?s)<script>window\.state\s*[:=]\s(\{.+\})\n(\w+).*(</script>).*',
|
||||
webpage, 'WindowState')),
|
||||
video_id)
|
||||
if not source:
|
||||
raise ExtractorError('Cannot find source', expected=True)
|
||||
|
||||
config_dict = source['config']
|
||||
video_dict = source['entities'][video_id]
|
||||
stream_urls = self._get_stream_urls(video_dict)
|
||||
|
||||
title = self._og_search_title(webpage, fatal=True, default=video_dict['title'])
|
||||
thumbnails = []
|
||||
for i in video_dict.get('imageInfo') or []:
|
||||
thumbnails.append({
|
||||
'url': urljoin(config_dict['imageBaseUrl'], i['url']),
|
||||
'width': i['width'],
|
||||
'height': i['height'],
|
||||
})
|
||||
|
||||
formats = []
|
||||
headers = {'Referer': url}
|
||||
for stream_name, stream_url in stream_urls:
|
||||
if stream_name == 'dash':
|
||||
format_url = url_or_none(urljoin(config_dict['videoCdnBaseUrl'], stream_url))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', headers=headers))
|
||||
get_quality = qualities(['main', 'base', 'high'])
|
||||
for stream_type, quality, stream_url in self._get_stream_urls(video_dict):
|
||||
format_url = url_or_none(urljoin(config_dict['videoCdnBaseUrl'], stream_url))
|
||||
if not format_url:
|
||||
continue
|
||||
if stream_type == 'dash':
|
||||
dash_formats = self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash-%s' % quality, headers={'Referer': url})
|
||||
for frmt in dash_formats:
|
||||
frmt['quality'] = get_quality(quality)
|
||||
formats.extend(dash_formats)
|
||||
elif stream_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, fatal=False,
|
||||
m3u8_id='hls-%s' % quality, quality=get_quality(quality)))
|
||||
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id.replace('/', '-'),
|
||||
'title': video_dict['title'] or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': video_dict.get('description'),
|
||||
'formats': formats
|
||||
'season': try_get(video_dict, lambda x: x['container']['title']),
|
||||
'series': try_get(video_dict, lambda x: x['container']['container']['title']),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
if video_dict.get('imageInfo'):
|
||||
info['thumbnails'] = list(map(lambda i: dict(i, **{
|
||||
'url': urljoin(config_dict['imageBaseUrl'], i['url'])
|
||||
}), video_dict['imageInfo']))
|
||||
|
||||
if video_dict.get('webUrl'):
|
||||
last_part = video_dict['webUrl'].split("/")[-1]
|
||||
info['display_id'] = last_part.replace(video_id, "").rstrip("-")
|
||||
|
||||
return info
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
@@ -18,7 +19,7 @@
|
||||
)
|
||||
|
||||
|
||||
class NBCIE(AdobePassIE):
|
||||
class NBCIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||
|
||||
_TESTS = [
|
||||
@@ -132,7 +133,9 @@ def _real_extract(self, url):
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
video_id = video_data['mpxGuid']
|
||||
title = video_data['secondaryTitle']
|
||||
tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id)
|
||||
tpm = self._download_theplatform_metadata(tp_path, video_id)
|
||||
title = tpm.get('title') or video_data.get('secondaryTitle')
|
||||
if video_data.get('locked'):
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data.get('resourceId') or 'nbcentertainment',
|
||||
@@ -142,18 +145,40 @@ def _real_extract(self, url):
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
|
||||
query), {'force_smil_url': True})
|
||||
|
||||
# Empty string or 0 can be valid values for these. So the check must be `is None`
|
||||
description = video_data.get('description')
|
||||
if description is None:
|
||||
description = tpm.get('description')
|
||||
episode_number = int_or_none(video_data.get('episodeNumber'))
|
||||
if episode_number is None:
|
||||
episode_number = int_or_none(tpm.get('nbcu$airOrder'))
|
||||
rating = video_data.get('rating')
|
||||
if rating is None:
|
||||
try_get(tpm, lambda x: x['ratings'][0]['rating'])
|
||||
season_number = int_or_none(video_data.get('seasonNumber'))
|
||||
if season_number is None:
|
||||
season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
|
||||
series = video_data.get('seriesShortTitle')
|
||||
if series is None:
|
||||
series = tpm.get('nbcu$seriesShortTitle')
|
||||
tags = video_data.get('keywords')
|
||||
if tags is None or len(tags) == 0:
|
||||
tags = tpm.get('keywords')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'description': description,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
'id': video_id,
|
||||
'ie_key': 'ThePlatform',
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
'tags': tags,
|
||||
'title': title,
|
||||
'url': theplatform_url,
|
||||
'description': video_data.get('description'),
|
||||
'tags': video_data.get('keywords'),
|
||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'episode': title,
|
||||
'series': video_data.get('seriesShortTitle'),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
@@ -20,6 +21,7 @@
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
PostProcessingError,
|
||||
str_or_none,
|
||||
remove_start,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
@@ -34,7 +36,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
@@ -203,7 +205,7 @@ def _get_heartbeat_info(self, info_dict):
|
||||
'data-api-data="([^"]+)"', webpage,
|
||||
'API data', default='{}'), video_id)
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['video']['dmcInfo']['session_api'])
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
# ping
|
||||
@@ -220,7 +222,7 @@ def _get_heartbeat_info(self, info_dict):
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
# m3u8 (encryption)
|
||||
if 'encryption' in try_get(api_data, lambda x: x['video']['dmcInfo']) or {}:
|
||||
if 'encryption' in (try_get(api_data, lambda x: x['media']['delivery']['movie']) or {}):
|
||||
protocol = 'm3u8'
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
@@ -244,8 +246,8 @@ def _get_heartbeat_info(self, info_dict):
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -258,15 +260,15 @@ def _get_heartbeat_info(self, info_dict):
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data.get('player_id'),
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': try_get(session_api_data, lambda x: x['auth_types'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('content_key_timeout'),
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data.get('service_user_id')
|
||||
'service_user_id': session_api_data.get('serviceUserId')
|
||||
},
|
||||
'content_id': session_api_data.get('content_id'),
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
@@ -279,7 +281,7 @@ def _get_heartbeat_info(self, info_dict):
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data.get('heartbeat_lifetime')
|
||||
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||
}
|
||||
},
|
||||
'priority': session_api_data.get('priority'),
|
||||
@@ -289,7 +291,7 @@ def _get_heartbeat_info(self, info_dict):
|
||||
'http_parameters': session_api_http_parameters
|
||||
}
|
||||
},
|
||||
'recipe_id': session_api_data.get('recipe_id'),
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data.get('signature'),
|
||||
@@ -308,7 +310,7 @@ def _get_heartbeat_info(self, info_dict):
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeat_lifetime'), scale=2000),
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=2000),
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
@@ -327,15 +329,17 @@ def parse_format_id(id_code):
|
||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||
vdict = parse_format_id(video_quality['id'])
|
||||
adict = parse_format_id(audio_quality['id'])
|
||||
resolution = video_quality.get('resolution', {'height': vdict.get('res')})
|
||||
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||
|
||||
return {
|
||||
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'vcodec': vdict.get('codec'),
|
||||
'acodec': adict.get('codec'),
|
||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000) or float_or_none(vdict.get('br')),
|
||||
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||
'width': int_or_none(resolution.get('width')),
|
||||
@@ -394,92 +398,93 @@ def get_video_info_xml(items):
|
||||
formats = []
|
||||
|
||||
# Get HTML5 videos info
|
||||
try:
|
||||
dmc_info = api_data['video']['dmcInfo']
|
||||
except KeyError:
|
||||
raise ExtractorError('The video can\'t downloaded.',
|
||||
expected=True)
|
||||
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||
if not quality_info:
|
||||
raise ExtractorError('The video can\'t downloaded.', expected=True)
|
||||
|
||||
quality_info = dmc_info.get('quality')
|
||||
for audio_quality in quality_info.get('audios') or {}:
|
||||
for video_quality in quality_info.get('videos') or {}:
|
||||
if not audio_quality.get('available') or not video_quality.get('available'):
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
continue
|
||||
formats.append(self._extract_format_for_quality(
|
||||
api_data, video_id, audio_quality, video_quality))
|
||||
|
||||
# Get flv/swf info
|
||||
timestamp = None
|
||||
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||
is_economy = video_real_url.endswith('low')
|
||||
if not video_real_url:
|
||||
self.report_warning('Unable to obtain smile video information')
|
||||
else:
|
||||
is_economy = video_real_url.endswith('low')
|
||||
|
||||
if is_economy:
|
||||
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||
if is_economy:
|
||||
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||
|
||||
# Invoking ffprobe to determine resolution
|
||||
pp = FFmpegPostProcessor(self._downloader)
|
||||
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||
# Invoking ffprobe to determine resolution
|
||||
pp = FFmpegPostProcessor(self._downloader)
|
||||
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||
|
||||
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||
|
||||
try:
|
||||
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||
except PostProcessingError as err:
|
||||
raise ExtractorError(err.msg, expected=True)
|
||||
try:
|
||||
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||
except PostProcessingError as err:
|
||||
raise ExtractorError(err.msg, expected=True)
|
||||
|
||||
v_stream = a_stream = {}
|
||||
v_stream = a_stream = {}
|
||||
|
||||
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||
for stream in metadata['streams']:
|
||||
if stream['codec_type'] == 'video':
|
||||
v_stream = stream
|
||||
elif stream['codec_type'] == 'audio':
|
||||
a_stream = stream
|
||||
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||
for stream in metadata['streams']:
|
||||
if stream['codec_type'] == 'video':
|
||||
v_stream = stream
|
||||
elif stream['codec_type'] == 'audio':
|
||||
a_stream = stream
|
||||
|
||||
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||
filesize = int(
|
||||
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||
or metadata['format']['size']
|
||||
)
|
||||
extension = (
|
||||
get_video_info_xml('movie_type')
|
||||
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||
)
|
||||
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||
filesize = int(
|
||||
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||
or metadata['format']['size']
|
||||
)
|
||||
extension = (
|
||||
get_video_info_xml('movie_type')
|
||||
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||
)
|
||||
|
||||
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||
timestamp = (
|
||||
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||
)
|
||||
metadata_timestamp = (
|
||||
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||
or timestamp if extension != 'mp4' else 0
|
||||
)
|
||||
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||
timestamp = (
|
||||
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||
)
|
||||
metadata_timestamp = (
|
||||
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||
or timestamp if extension != 'mp4' else 0
|
||||
)
|
||||
|
||||
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||
|
||||
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||
|
||||
# If movie file size is unstable, old server movie is not source movie.
|
||||
if filesize > 1:
|
||||
formats.append({
|
||||
'url': video_real_url,
|
||||
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||
'ext': extension,
|
||||
'container': extension,
|
||||
'vcodec': v_stream.get('codec_name'),
|
||||
'acodec': a_stream.get('codec_name'),
|
||||
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||
'height': int_or_none(v_stream.get('height')),
|
||||
'width': int_or_none(v_stream.get('width')),
|
||||
'source_preference': 5 if not is_economy else -2,
|
||||
'quality': 5 if is_source and not is_economy else None,
|
||||
'filesize': filesize
|
||||
})
|
||||
# If movie file size is unstable, old server movie is not source movie.
|
||||
if filesize > 1:
|
||||
formats.append({
|
||||
'url': video_real_url,
|
||||
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||
'ext': extension,
|
||||
'container': extension,
|
||||
'vcodec': v_stream.get('codec_name'),
|
||||
'acodec': a_stream.get('codec_name'),
|
||||
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||
'height': int_or_none(v_stream.get('height')),
|
||||
'width': int_or_none(v_stream.get('width')),
|
||||
'source_preference': 5 if not is_economy else -2,
|
||||
'quality': 5 if is_source and not is_economy else None,
|
||||
'filesize': filesize
|
||||
})
|
||||
|
||||
if len(formats) == 0:
|
||||
raise ExtractorError('Unable to find video info.')
|
||||
@@ -487,13 +492,12 @@ def get_video_info_xml(items):
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Start extracting information
|
||||
title = get_video_info_web('originalTitle')
|
||||
if not title:
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
title = (
|
||||
get_video_info_web(['originalTitle', 'title'])
|
||||
or self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(
|
||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||
webpage, 'video title')
|
||||
webpage, 'video title'))
|
||||
|
||||
watch_api_data_string = self._html_search_regex(
|
||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||
@@ -517,6 +521,7 @@ def get_video_info_xml(items):
|
||||
timestamp = parse_iso8601(
|
||||
video_detail['postedAt'].replace('/', '-'),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||
|
||||
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||
if not view_count:
|
||||
@@ -525,11 +530,16 @@ def get_video_info_xml(items):
|
||||
webpage, 'view count', default=None)
|
||||
if match:
|
||||
view_count = int_or_none(match.replace(',', ''))
|
||||
view_count = view_count or video_detail.get('viewCount')
|
||||
view_count = (
|
||||
view_count
|
||||
or video_detail.get('viewCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||
|
||||
comment_count = (
|
||||
int_or_none(get_video_info_web('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||
|
||||
comment_count = (int_or_none(get_video_info_web('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
||||
if not comment_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||
@@ -559,7 +569,7 @@ def get_video_info_xml(items):
|
||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||
# in the JSON, which will cause None to be returned instead of {}.
|
||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||
uploader_id = (
|
||||
uploader_id = str_or_none(
|
||||
get_video_info_web(['ch_id', 'user_id'])
|
||||
or owner.get('id')
|
||||
or channel_id
|
||||
@@ -589,7 +599,7 @@ def get_video_info_xml(items):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
@@ -647,3 +657,40 @@ def pagefunc(pagenum):
|
||||
'uploader_id': uploader_id,
|
||||
'entries': OnDemandPagedList(pagefunc, 25),
|
||||
}
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
'id': '419948',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||
_api_headers = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
'X-Niconico-Language': 'en-us'
|
||||
}
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _entries(self, list_id, ):
|
||||
total_count = 1
|
||||
count = page_num = 0
|
||||
while count < total_count:
|
||||
json_parsed = self._download_json(
|
||||
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||
headers=self._api_headers,
|
||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||
if not page_num:
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed["data"]["items"]:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||
|
||||
@@ -599,11 +599,13 @@ def channel_data(field, type_):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
||||
'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
|
||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||
'uploader': account_data('displayName', compat_str),
|
||||
'uploader_id': str_or_none(account_data('id', int)),
|
||||
@@ -621,5 +623,6 @@ def channel_data(field, type_):
|
||||
'tags': try_get(video, lambda x: x['tags'], list),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
'subtitles': subtitles,
|
||||
'webpage_url': webpage_url,
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ def _extract_video(self, data, extract_formats=True):
|
||||
|
||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
duration = None
|
||||
if extract_formats:
|
||||
@@ -38,8 +39,9 @@ def _extract_video(self, data, extract_formats=True):
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
if not format_url or format_url in urls:
|
||||
continue
|
||||
urls.append(format_url)
|
||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||
ext = determine_ext(format_url)
|
||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||
|
||||
164
yt_dlp/extractor/plutotv.py
Normal file
164
yt_dlp/extractor/plutotv.py
Normal file
@@ -0,0 +1,164 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PlutoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pluto\.tv/on-demand/(?P<video_type>movies|series)/(?P<slug>.*)/?$'
|
||||
_INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/'
|
||||
_INFO_QUERY_PARAMS = {
|
||||
'appName': 'web',
|
||||
'appVersion': 'na',
|
||||
'clientID': compat_str(uuid.uuid1()),
|
||||
'clientModelNumber': 'na',
|
||||
'serverSideAds': 'false',
|
||||
'deviceMake': 'unknown',
|
||||
'deviceModel': 'web',
|
||||
'deviceType': 'web',
|
||||
'deviceVersion': 'unknown',
|
||||
'sid': compat_str(uuid.uuid1()),
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3',
|
||||
'md5': 'ebcdd8ed89aaace9df37924f722fd9bd',
|
||||
'info_dict': {
|
||||
'id': '5de6c598e9379ae4912df0a8',
|
||||
'ext': 'mp4',
|
||||
'title': 'It\'s In The Cards',
|
||||
'episode': 'It\'s In The Cards',
|
||||
'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.',
|
||||
'series': 'I Love Money',
|
||||
'season_number': 2,
|
||||
'episode_number': 3,
|
||||
'duration': 3600,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/',
|
||||
'playlist_count': 11,
|
||||
'info_dict': {
|
||||
'id': '5de6c582e9379ae4912dedbd',
|
||||
'title': 'I Love Money - Season 1',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://pluto.tv/on-demand/series/i-love-money/',
|
||||
'playlist_count': 26,
|
||||
'info_dict': {
|
||||
'id': '5de6c582e9379ae4912dedbd',
|
||||
'title': 'I Love Money',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1',
|
||||
'md5': '3cead001d317a018bf856a896dee1762',
|
||||
'info_dict': {
|
||||
'id': '5e83ac701fa6a9001bb9df24',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arrival',
|
||||
'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.',
|
||||
'duration': 9000,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _to_ad_free_formats(self, video_id, formats):
|
||||
ad_free_formats = []
|
||||
m3u8_urls = set()
|
||||
for format in formats:
|
||||
res = self._download_webpage(
|
||||
format.get('url'), video_id, note='Downloading m3u8 playlist',
|
||||
fatal=False)
|
||||
if not res:
|
||||
continue
|
||||
first_segment_url = re.search(
|
||||
r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res,
|
||||
re.MULTILINE)
|
||||
if not first_segment_url:
|
||||
continue
|
||||
m3u8_urls.add(
|
||||
compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8'))
|
||||
|
||||
for m3u8_url in m3u8_urls:
|
||||
ad_free_formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(ad_free_formats)
|
||||
return ad_free_formats
|
||||
|
||||
def _get_video_info(self, video_json, slug, series_name=None):
|
||||
video_id = video_json.get('_id', slug)
|
||||
formats = []
|
||||
for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []:
|
||||
if video_url.get('type') != 'hls':
|
||||
continue
|
||||
url = url_or_none(video_url.get('url'))
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
info = {
|
||||
'id': video_id,
|
||||
'formats': self._to_ad_free_formats(video_id, formats),
|
||||
'title': video_json.get('name'),
|
||||
'description': video_json.get('description'),
|
||||
'duration': float_or_none(video_json.get('duration'), scale=1000),
|
||||
}
|
||||
if series_name:
|
||||
info.update({
|
||||
'series': series_name,
|
||||
'episode': video_json.get('name'),
|
||||
'season_number': int_or_none(video_json.get('season')),
|
||||
'episode_number': int_or_none(video_json.get('number')),
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
path = compat_urlparse.urlparse(url).path
|
||||
path_components = path.split('/')
|
||||
video_type = path_components[2]
|
||||
info_slug = path_components[3]
|
||||
video_json = self._download_json(self._INFO_URL + info_slug, info_slug,
|
||||
query=self._INFO_QUERY_PARAMS)
|
||||
|
||||
if video_type == 'series':
|
||||
series_name = video_json.get('name', info_slug)
|
||||
season_number = int_or_none(try_get(path_components, lambda x: x[5]))
|
||||
episode_slug = try_get(path_components, lambda x: x[7])
|
||||
|
||||
videos = []
|
||||
for season in video_json['seasons']:
|
||||
if season_number is not None and season_number != int_or_none(season.get('number')):
|
||||
continue
|
||||
for episode in season['episodes']:
|
||||
if episode_slug is not None and episode_slug != episode.get('slug'):
|
||||
continue
|
||||
videos.append(self._get_video_info(episode, episode_slug, series_name))
|
||||
if not videos:
|
||||
raise ExtractorError('Failed to find any videos to extract')
|
||||
if episode_slug is not None and len(videos) == 1:
|
||||
return videos[0]
|
||||
playlist_title = series_name
|
||||
if season_number is not None:
|
||||
playlist_title += ' - Season %d' % season_number
|
||||
return self.playlist_result(videos,
|
||||
playlist_id=video_json.get('_id', info_slug),
|
||||
playlist_title=playlist_title)
|
||||
return self._get_video_info(video_json, info_slug)
|
||||
@@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
@@ -265,7 +266,8 @@ def dl_webpage(platform):
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
@@ -394,6 +396,21 @@ def parse_quality_items(quality_items):
|
||||
|
||||
upload_date = None
|
||||
formats = []
|
||||
|
||||
def add_format(format_url, height=None):
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
for video_url, height in video_urls:
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
@@ -410,18 +427,19 @@ def parse_quality_items(quality_items):
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
if '/video/get_media' in video_url:
|
||||
medias = self._download_json(video_url, video_id, fatal=False)
|
||||
if isinstance(medias, list):
|
||||
for media in medias:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
video_url = url_or_none(media.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
height = int_or_none(media.get('quality'))
|
||||
add_format(video_url, height)
|
||||
continue
|
||||
add_format(video_url)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
|
||||
@@ -158,6 +158,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||
# subtitles at 'subtitlesArray' key (see #27698)
|
||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DRM protected
|
||||
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -166,6 +170,14 @@ def _real_extract(self, url):
|
||||
media = self._download_json(
|
||||
base + '.json', video_id, 'Downloading video JSON')
|
||||
|
||||
if not self._downloader.params.get('allow_unplayable_formats'):
|
||||
if try_get(
|
||||
media,
|
||||
(lambda x: x['rights_management']['rights']['drm'],
|
||||
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||
dict):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
title = media['name']
|
||||
video = media['video']
|
||||
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
|
||||
|
||||
class RCSBaseIE(InfoExtractor):
|
||||
# based on VideoPlayerLoader.prototype.getVideoSrc
|
||||
# and VideoPlayerLoader.prototype.transformSrc from
|
||||
# https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs
|
||||
_ALL_REPLACE = {
|
||||
'media2vam.corriere.it.edgesuite.net':
|
||||
'media2vam-corriere-it.akamaized.net',
|
||||
@@ -191,10 +194,10 @@ def _create_formats(self, urls, video_id):
|
||||
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
|
||||
if not formats:
|
||||
if urls.get('mp4'):
|
||||
formats.append({
|
||||
'format_id': 'http-mp4',
|
||||
'url': urls.get('mp4')
|
||||
'url': urls['mp4']
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
@@ -216,10 +219,12 @@ def _real_extract(self, url):
|
||||
video_data = None
|
||||
# look for json video data url
|
||||
json = self._search_regex(
|
||||
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
||||
//video\.rcs\.it
|
||||
/fragment-includes/video-includes/.+?\.json)["'];''',
|
||||
page, video_id, default=None)
|
||||
r'''(?x)url\s*=\s*(["'])
|
||||
(?P<url>
|
||||
(?:https?:)?//video\.rcs\.it
|
||||
/fragment-includes/video-includes/.+?\.json
|
||||
)\1;''',
|
||||
page, video_id, group='url', default=None)
|
||||
if json:
|
||||
if json.startswith('//'):
|
||||
json = 'https:%s' % json
|
||||
@@ -227,13 +232,16 @@ def _real_extract(self, url):
|
||||
|
||||
# if json url not found, look for json video data directly in the page
|
||||
else:
|
||||
# RCS normal pages and most of the embeds
|
||||
json = self._search_regex(
|
||||
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||
page, video_id, default=None)
|
||||
if json:
|
||||
video_data = self._parse_json(
|
||||
json, video_id, transform_source=js_to_json)
|
||||
else:
|
||||
if not json and 'video-embed' in url:
|
||||
page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id)
|
||||
json = self._search_regex(
|
||||
r'##start-video##({[\s\S]+?})##end-video##',
|
||||
page, video_id, default=None)
|
||||
if not json:
|
||||
# if no video data found try search for iframes
|
||||
emb = RCSEmbedsIE._extract_url(page)
|
||||
if emb:
|
||||
@@ -242,6 +250,9 @@ def _real_extract(self, url):
|
||||
'url': emb,
|
||||
'ie_key': RCSEmbedsIE.ie_key()
|
||||
}
|
||||
if json:
|
||||
video_data = self._parse_json(
|
||||
json, video_id, transform_source=js_to_json)
|
||||
|
||||
if not video_data:
|
||||
raise ExtractorError('Video data not found in the page')
|
||||
@@ -250,7 +261,8 @@ def _real_extract(self, url):
|
||||
self._get_video_src(video_data), video_id)
|
||||
|
||||
description = (video_data.get('description')
|
||||
or clean_html(video_data.get('htmlDescription')))
|
||||
or clean_html(video_data.get('htmlDescription'))
|
||||
or self._html_search_meta('description', page))
|
||||
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||
|
||||
return {
|
||||
@@ -283,6 +295,7 @@ class RCSEmbedsIE(RCSBaseIE):
|
||||
'uploader': 'rcs.it',
|
||||
}
|
||||
}, {
|
||||
# redownload the page changing 'video-embed' in 'video-json'
|
||||
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||
'info_dict': {
|
||||
@@ -359,6 +372,7 @@ class RCSIE(RCSBaseIE):
|
||||
'uploader': 'Corriere Tv',
|
||||
}
|
||||
}, {
|
||||
# video data inside iframe
|
||||
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||
'info_dict': {
|
||||
@@ -389,15 +403,15 @@ class RCSVariousIE(RCSBaseIE):
|
||||
(?P<cdn>
|
||||
leitv\.it|
|
||||
youreporter\.it
|
||||
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||
)/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
||||
'md5': '618aaabac32152199c1af86784d4d554',
|
||||
'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/',
|
||||
'md5': '92b4e63667b8f95acb0a04da25ae28a1',
|
||||
'info_dict': {
|
||||
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
||||
'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marmellata di ciliegie fatta in casa',
|
||||
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
||||
'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto',
|
||||
'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5',
|
||||
'uploader': 'leitv.it',
|
||||
}
|
||||
}, {
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -14,56 +15,13 @@
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = compat_b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index - 4:]
|
||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||
data = bytearray(text_chunk[8:8 + length])
|
||||
data = [chr(b) for b in data if b != 0]
|
||||
hash_index = data.index('#')
|
||||
alphabet_data = data[:hash_index]
|
||||
url_data = data[hash_index + 1:]
|
||||
if url_data[0] == 'H' and url_data[3] == '%':
|
||||
# remove useless HQ%% at the start
|
||||
url_data = url_data[4:]
|
||||
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data:
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data:
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
return url
|
||||
_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
@@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
'duration': 5024.566,
|
||||
'series': 'Balonmano',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
'info_dict': {
|
||||
'id': '1694255',
|
||||
'ext': 'flv',
|
||||
'title': 'TODO',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
},
|
||||
'skip': 'The f4m manifest can\'t be used yet',
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
||||
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104 ',
|
||||
'title': 'Servir y proteger - Capítulo 104',
|
||||
'duration': 3222.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
@@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||
manager_info = self._download_json(
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')
|
||||
self._manager = manager_info['manager']
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
|
||||
while True:
|
||||
length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
alphabet_data, text = data.split(b'\0')
|
||||
quality, url_data = text.split(b'%%')
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in _bytes_to_chr(alphabet_data):
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in _bytes_to_chr(url_data):
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
yield quality.decode(), url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
def _extract_png_formats(self, video_id):
|
||||
png = self._download_webpage(
|
||||
'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
|
||||
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title']
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png_request = sanitized_Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
formats = []
|
||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
||||
if '?' not in video_url:
|
||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
title = info['title'].strip()
|
||||
formats = self._extract_png_formats(video_id)
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||
sbt_file = info.get('sbtFile')
|
||||
if sbt_file:
|
||||
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||
|
||||
is_live = info.get('live') is True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'page_url': url,
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'is_live': is_live,
|
||||
'series': info.get('programTitle'),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, sub_file):
|
||||
@@ -174,48 +179,26 @@ def _get_subtitles(self, video_id, sub_file):
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEInfantilIE(InfoExtractor):
|
||||
class RTVEInfantilIE(RTVEALaCartaIE):
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info.get('image'),
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
}
|
||||
|
||||
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
class RTVELiveIE(RTVEALaCartaIE):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
@@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
@@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
start_time = time.gmtime()
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
(r'playerId=player([0-9]+)',
|
||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||
r'data-id=["\'](\d+)'),
|
||||
webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
m3u8_url = _decrypt_url(png)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'title': self._live_title(title),
|
||||
'formats': self._extract_png_formats(vidplayer_id),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
@@ -51,13 +51,16 @@ class ShahidIE(ShahidBaseIE):
|
||||
_NETRC_MACHINE = 'shahid'
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
|
||||
'info_dict': {
|
||||
'id': '275286',
|
||||
'id': '816924',
|
||||
'ext': 'mp4',
|
||||
'title': 'مجلس الشباب الموسم 1 كليب 1',
|
||||
'timestamp': 1506988800,
|
||||
'upload_date': '20171003',
|
||||
'title': 'متحف الدحيح الموسم 1 كليب 1',
|
||||
'timestamp': 1602806400,
|
||||
'upload_date': '20201016',
|
||||
'description': 'برومو',
|
||||
'duration': 22,
|
||||
'categories': ['كوميديا'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -109,12 +112,15 @@ def _real_extract(self, url):
|
||||
page_type = 'episode'
|
||||
|
||||
playout = self._call_api(
|
||||
'playout/url/' + video_id, video_id)['playout']
|
||||
'playout/new/url/' + video_id, video_id)['playout']
|
||||
|
||||
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
|
||||
formats = self._extract_m3u8_formats(re.sub(
|
||||
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
|
||||
r'aws\.manifestfilter=[\w:;,-]+&?',
|
||||
'', playout['url']), video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
# video = self._call_api(
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
class SouthParkIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'southpark.cc.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
|
||||
@@ -23,8 +23,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'accountOverride': 'intl.mtvi.com',
|
||||
'arcEp': 'shared.southpark.global',
|
||||
'ep': '90877963',
|
||||
'imageEp': 'shared.southpark.global',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
|
||||
class SouthParkEsIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.cc.com:español'
|
||||
|
||||
@@ -1,82 +1,105 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SportDeutschlandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||
'info_dict': {
|
||||
'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
|
||||
'id': '5318cac0275701382770543d7edaf0a0',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
|
||||
'categories': ['Badminton-Deutschland'],
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': int,
|
||||
'upload_date': '20200201',
|
||||
'description': 're:.*', # meaningless description for THIS video
|
||||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
|
||||
'duration': 16106.36,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||
'info_dict': {
|
||||
'id': 'c6e2fdd01f63013854c47054d2ab776f',
|
||||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
|
||||
'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
|
||||
'duration': 31397,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sport_id = mobj.group('sport')
|
||||
|
||||
api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||
sport_id, video_id)
|
||||
req = sanitized_Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
'Referer': url,
|
||||
})
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
|
||||
display_id, query={'access_token': 'true'})
|
||||
asset = data['asset']
|
||||
categories = [data['section']['title']]
|
||||
|
||||
formats = []
|
||||
smil_url = asset['video']
|
||||
if '.smil' in smil_url:
|
||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
|
||||
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id, note='Downloading SMIL metadata')
|
||||
base_url_el = smil_doc.find('./head/meta')
|
||||
if base_url_el:
|
||||
base_url = base_url_el.attrib['base']
|
||||
formats.extend([{
|
||||
'format_id': 'rmtp',
|
||||
'url': base_url if base_url_el else n.attrib['src'],
|
||||
'play_path': n.attrib['src'],
|
||||
'ext': 'flv',
|
||||
'preference': -100,
|
||||
'format_note': 'Seems to fail at example stream',
|
||||
} for n in smil_doc.findall('./body/video')])
|
||||
else:
|
||||
formats.append({'url': smil_url})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': asset['title'],
|
||||
'thumbnail': asset.get('image'),
|
||||
'description': asset.get('teaser'),
|
||||
'duration': asset.get('duration'),
|
||||
'categories': categories,
|
||||
'view_count': asset.get('views'),
|
||||
'rtmp_live': asset.get('live'),
|
||||
'timestamp': parse_iso8601(asset.get('date')),
|
||||
title = (asset.get('title') or asset['label']).strip()
|
||||
asset_id = asset.get('id') or asset.get('uuid')
|
||||
info = {
|
||||
'id': asset_id,
|
||||
'title': title,
|
||||
'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
|
||||
'duration': int_or_none(asset.get('seconds')),
|
||||
}
|
||||
videos = asset.get('videos') or []
|
||||
if len(videos) > 1:
|
||||
playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
|
||||
if playlist_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
videos = [videos[int(playlist_id)]]
|
||||
self.to_screen('Downloading just a single video because of --no-playlist')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
|
||||
|
||||
def entries():
|
||||
for i, video in enumerate(videos, 1):
|
||||
video_id = video.get('uuid')
|
||||
video_url = video.get('url')
|
||||
if not (video_id and video_url):
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
yield {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
}
|
||||
info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries(),
|
||||
})
|
||||
else:
|
||||
formats = self._extract_m3u8_formats(
|
||||
videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
|
||||
section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'display_id': asset.get('permalink'),
|
||||
'thumbnail': try_get(asset, lambda x: x['images'][0]),
|
||||
'categories': [section_title] if section_title else None,
|
||||
'view_count': int_or_none(asset.get('views')),
|
||||
'is_live': asset.get('is_live') is True,
|
||||
'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -1,19 +1,61 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StitcherIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/(?:[^/]+/)+e(?:pisode)?/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
|
||||
class StitcherBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/' + path,
|
||||
video_id, query=query)
|
||||
error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
|
||||
if error_massage:
|
||||
raise ExtractorError(error_massage, expected=True)
|
||||
return resp['data']
|
||||
|
||||
def _extract_description(self, data):
|
||||
return clean_html(data.get('html_description') or data.get('description'))
|
||||
|
||||
def _extract_audio_url(self, episode):
|
||||
return url_or_none(episode.get('audio_url') or episode.get('guid'))
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'thumbnail': show.get('image_base_url'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _extract_episode(self, episode, audio_url, show_info):
|
||||
info = {
|
||||
'id': compat_str(episode['id']),
|
||||
'display_id': episode.get('slug'),
|
||||
'title': episode['title'].strip(),
|
||||
'description': self._extract_description(episode),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'url': clean_podcast_url(audio_url),
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_published')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
|
||||
class StitcherIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
|
||||
'md5': 'e9635098e0da10b21a0e2b85585530f6',
|
||||
@@ -24,8 +66,9 @@ class StitcherIE(InfoExtractor):
|
||||
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
|
||||
'duration': 1604,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20180126',
|
||||
'timestamp': 1516989316,
|
||||
'upload_date': '20151008',
|
||||
'timestamp': 1444285800,
|
||||
'series': 'Talking Machines',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
|
||||
@@ -55,33 +98,47 @@ class StitcherIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, audio_id = re.match(self._VALID_URL, url).groups()
|
||||
audio_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'shows/episodes', audio_id, {'episode_ids': audio_id})
|
||||
episode = data['episodes'][0]
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
self.raise_login_required()
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
return self._extract_episode(
|
||||
episode, audio_url, self._extract_show_info(show))
|
||||
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/episode/' + audio_id,
|
||||
display_id or audio_id)
|
||||
episode = try_get(resp, lambda x: x['data']['episodes'][0], dict)
|
||||
if not episode:
|
||||
raise ExtractorError(resp['errors'][0]['message'], expected=True)
|
||||
|
||||
title = episode['title'].strip()
|
||||
audio_url = episode['audio_url']
|
||||
class StitcherShowIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines',
|
||||
'info_dict': {
|
||||
'id': 'the-talking-machines',
|
||||
'title': 'Talking Machines',
|
||||
'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
|
||||
},
|
||||
'playlist_mincount': 106,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/the-talking-machines',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
thumbnail = None
|
||||
show_id = episode.get('show_id')
|
||||
if show_id and episode.get('classic_id') != -1:
|
||||
thumbnail = 'https://stitcher-classic.imgix.net/feedimages/%s.jpg' % show_id
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
show_info = self._extract_show_info(show)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': clean_html(episode.get('html_description') or episode.get('description')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'thumbnail': thumbnail,
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_created')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
entries = []
|
||||
for episode in (data.get('episodes') or []):
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, audio_url, show_info))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_slug, show.get('title'),
|
||||
self._extract_description(show))
|
||||
|
||||
@@ -146,18 +146,19 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
)
|
||||
(?P<svt_id>[^/?#&]+)|
|
||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||
(?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))?
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
|
||||
'url': 'https://www.svtplay.se/video/30479064',
|
||||
'md5': '2382036fd6f8c994856c323fe51c426e',
|
||||
'info_dict': {
|
||||
'id': 'jNwpV9P',
|
||||
'id': '8zVbDPA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Det här är himlen',
|
||||
'timestamp': 1586044800,
|
||||
'upload_date': '20200405',
|
||||
'duration': 3515,
|
||||
'title': 'Designdrömmar i Stenungsund',
|
||||
'timestamp': 1615770000,
|
||||
'upload_date': '20210315',
|
||||
'duration': 3519,
|
||||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||
'age_limit': 0,
|
||||
'subtitles': {
|
||||
@@ -173,6 +174,9 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted to Sweden
|
||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||
@@ -219,7 +223,8 @@ def _extract_by_video_id(self, video_id, webpage=None):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, svt_id = mobj.group('id', 'svt_id')
|
||||
video_id = mobj.group('id')
|
||||
svt_id = mobj.group('svt_id') or mobj.group('modal_id')
|
||||
|
||||
if svt_id:
|
||||
return self._extract_by_video_id(svt_id)
|
||||
@@ -254,6 +259,7 @@ def _real_extract(self, url):
|
||||
if not svt_id:
|
||||
svt_id = self._search_regex(
|
||||
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id),
|
||||
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
|
||||
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
||||
|
||||
@@ -143,7 +143,10 @@ def _real_extract(self, url):
|
||||
props_data = try_get(json_data, lambda x: x['props'], expected_type=dict)
|
||||
|
||||
# Chech statusCode for success
|
||||
if props_data.get('pageProps').get('statusCode') == 0:
|
||||
status = props_data.get('pageProps').get('statusCode')
|
||||
if status == 0:
|
||||
return self._extract_aweme(props_data, webpage, url)
|
||||
elif status == 10216:
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
|
||||
raise ExtractorError('Video not available', video_id=video_id)
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
class TrovoBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
|
||||
_HEADERS = {'Origin': 'https://trovo.live'}
|
||||
|
||||
def _extract_streamer_info(self, data):
|
||||
streamer_info = data.get('streamerInfo') or {}
|
||||
@@ -68,6 +69,7 @@ def _real_extract(self, url):
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'url': play_url,
|
||||
'http_headers': self._HEADERS,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -153,6 +155,7 @@ def _real_extract(self, url):
|
||||
'protocol': 'm3u8_native',
|
||||
'tbr': int_or_none(play_info.get('bitrate')),
|
||||
'url': play_url,
|
||||
'http_headers': self._HEADERS,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
int_or_none,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
@@ -25,6 +26,10 @@ class TVerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tver.jp/episode/79622438',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subtitle = ' '
|
||||
'url': 'https://tver.jp/corner/f0068870',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = None
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
@@ -47,8 +52,12 @@ def _real_extract(self, url):
|
||||
}
|
||||
|
||||
if service == 'cx':
|
||||
title = main['title']
|
||||
subtitle = strip_or_none(main.get('subtitle'))
|
||||
if subtitle:
|
||||
title += ' - ' + subtitle
|
||||
info.update({
|
||||
'title': main.get('subtitle') or main['title'],
|
||||
'title': title,
|
||||
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
||||
'ie_key': 'FujiTVFODPlus7',
|
||||
})
|
||||
|
||||
@@ -23,6 +23,8 @@ class VGTVIE(XstreamIE):
|
||||
'fvn.no/fvntv': 'fvntv',
|
||||
'aftenposten.no/webtv': 'aptv',
|
||||
'ap.vgtv.no/webtv': 'aptv',
|
||||
'tv.aftonbladet.se': 'abtv',
|
||||
# obsolete URL schemas, kept in order to save one HTTP redirect
|
||||
'tv.aftonbladet.se/abtv': 'abtv',
|
||||
'www.aftonbladet.se/tv': 'abtv',
|
||||
}
|
||||
@@ -140,6 +142,10 @@ class VGTVIE(XstreamIE):
|
||||
'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://tv.aftonbladet.se/video/36015/vulkanutbrott-i-rymden-nu-slapper-nasa-bilderna',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -498,6 +498,24 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/album/2632481/video/79010983',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/showcase/3253534/video/119195465',
|
||||
'note': 'A video in a password protected album (showcase)',
|
||||
'info_dict': {
|
||||
'id': '119195465',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'user20132939',
|
||||
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
},
|
||||
{
|
||||
# source file returns 403: Forbidden
|
||||
'url': 'https://vimeo.com/7809605',
|
||||
@@ -564,6 +582,44 @@ def _verify_player_video_password(self, url, video_id, headers):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _try_album_password(self, url):
|
||||
album_id = self._search_regex(
|
||||
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
|
||||
if not album_id:
|
||||
return
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||
webpage, 'bootstrap data'), album_id)['viewer']
|
||||
jwt = viewer['jwt']
|
||||
album = self._download_json(
|
||||
'https://api.vimeo.com/albums/' + album_id,
|
||||
album_id, headers={'Authorization': 'jwt ' + jwt},
|
||||
query={'fields': 'description,name,privacy'})
|
||||
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This album is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||
try:
|
||||
self._download_json(
|
||||
'https://vimeo.com/showcase/%s/auth' % album_id,
|
||||
album_id, 'Verifying the password', data=urlencode_postdata({
|
||||
'password': password,
|
||||
'token': viewer['xsrft'],
|
||||
}), headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
raise ExtractorError('Wrong password', expected=True)
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
headers = std_headers.copy()
|
||||
@@ -591,6 +647,7 @@ def _real_extract(self, url):
|
||||
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
self._try_album_password(url)
|
||||
try:
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
@@ -934,11 +991,15 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||
}
|
||||
if hashed_pass:
|
||||
query['_hashed_pass'] = hashed_pass
|
||||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorization,
|
||||
})['data']
|
||||
try:
|
||||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorization,
|
||||
})['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
return
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
if not link:
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,14 +21,17 @@ def _real_extract(self, url):
|
||||
|
||||
setup = self._parse_json(self._search_regex(
|
||||
r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
|
||||
video_data = setup.get('video') or {}
|
||||
player_setup = setup.get('player_setup') or setup
|
||||
video_data = player_setup.get('video') or {}
|
||||
formatted_metadata = video_data.get('formatted_metadata') or {}
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_data.get('title_short'),
|
||||
'title': player_setup.get('title') or video_data.get('title_short'),
|
||||
'description': video_data.get('description_long') or video_data.get('description_short'),
|
||||
'thumbnail': video_data.get('brightcove_thumbnail')
|
||||
'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
|
||||
'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
|
||||
}
|
||||
asset = setup.get('asset') or setup.get('params') or {}
|
||||
asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
|
||||
|
||||
formats = []
|
||||
hls_url = asset.get('hls_url')
|
||||
@@ -47,6 +52,7 @@ def _real_extract(self, url):
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
info['duration'] = int_or_none(asset.get('duration'))
|
||||
return info
|
||||
|
||||
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
|
||||
@@ -84,7 +90,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
}, {
|
||||
# Volume embed, Youtube
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
|
||||
'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
|
||||
'info_dict': {
|
||||
'id': 'Gy8Md3Eky38',
|
||||
'ext': 'mp4',
|
||||
@@ -93,6 +99,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
'uploader_id': 'TheVerge',
|
||||
'upload_date': '20141021',
|
||||
'uploader': 'The Verge',
|
||||
'timestamp': 1413907200,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'similar to the previous test',
|
||||
@@ -100,13 +107,13 @@ class VoxMediaIE(InfoExtractor):
|
||||
# Volume embed, Youtube
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
'info_dict': {
|
||||
'id': 'YCjDnX-Xzhg',
|
||||
'id': '22986359b',
|
||||
'ext': 'mp4',
|
||||
'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
|
||||
'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'upload_date': '20150915',
|
||||
'uploader': 'Vox',
|
||||
'timestamp': 1442332800,
|
||||
'duration': 285,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'similar to the previous test',
|
||||
@@ -160,6 +167,9 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
|
||||
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
|
||||
'timestamp': 1402938000,
|
||||
'upload_date': '20140616',
|
||||
'duration': 4114,
|
||||
},
|
||||
'add_ie': ['VoxMediaVolume'],
|
||||
}]
|
||||
|
||||
@@ -182,17 +182,20 @@ def metadata_from_url(r_url):
|
||||
if not embed_code:
|
||||
continue
|
||||
embed_code = ds(embed_code)
|
||||
if video_type in ('video/rcs', 'video/kenc'):
|
||||
if video_type == 'video/kenc':
|
||||
kenc = self._download_json(
|
||||
'https://www.vvvvid.it/kenc', video_id, query={
|
||||
'action': 'kt',
|
||||
'conn_id': self._conn_id,
|
||||
'url': embed_code,
|
||||
}, fatal=False) or {}
|
||||
kenc_message = kenc.get('message')
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
if video_type == 'video/kenc':
|
||||
embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
|
||||
kenc = self._download_json(
|
||||
'https://www.vvvvid.it/kenc', video_id, query={
|
||||
'action': 'kt',
|
||||
'conn_id': self._conn_id,
|
||||
'url': embed_code,
|
||||
}, fatal=False) or {}
|
||||
kenc_message = kenc.get('message')
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_type == 'video/rcs':
|
||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||
elif video_type == 'video/youtube':
|
||||
info.update({
|
||||
|
||||
163
yt_dlp/extractor/wimtv.py
Normal file
163
yt_dlp/extractor/wimtv.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class WimTVIE(InfoExtractor):
|
||||
_player = None
|
||||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://platform.wim.tv/
|
||||
(?:
|
||||
(?:embed/)?\?
|
||||
|\#/webtv/.+?/
|
||||
)
|
||||
(?P<type>vod|live|cast)[=/]
|
||||
(?P<id>%s).*?''' % _UUID_RE
|
||||
_TESTS = [{
|
||||
# vod stream
|
||||
'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
|
||||
'md5': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
|
||||
'info_dict': {
|
||||
'id': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
|
||||
'ext': 'mp4',
|
||||
'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS',
|
||||
'duration': 6481,
|
||||
'thumbnail': r're:https?://.+?/thumbnail/.+?/720$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'https://platform.wim.tv/embed/?live=28e22c22-49db-40f3-8c37-8cbb0ff44556&autostart=true',
|
||||
'info_dict': {
|
||||
'id': '28e22c22-49db-40f3-8c37-8cbb0ff44556',
|
||||
'ext': 'mp4',
|
||||
'title': 'Streaming MSmotorTV',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://platform.wim.tv/#/webtv/automotornews/vod/422492b6-539e-474d-9c6b-68c9d5893365',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://platform.wim.tv/#/webtv/renzoarborechannel/cast/f47e0d15-5b45-455e-bf0d-dba8ffa96365',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+src=["\'](?P<url>%s)' % WimTVIE._VALID_URL,
|
||||
webpage)]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._player:
|
||||
self._get_player_data()
|
||||
|
||||
def _get_player_data(self):
|
||||
msg_id = 'Player data'
|
||||
self._player = {}
|
||||
|
||||
datas = [{
|
||||
'url': 'https://platform.wim.tv/common/libs/player/wimtv/wim-rest.js',
|
||||
'vars': [{
|
||||
'regex': r'appAuth = "(.+?)"',
|
||||
'variable': 'app_auth',
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://platform.wim.tv/common/config/endpointconfig.js',
|
||||
'vars': [{
|
||||
'regex': r'PRODUCTION_HOSTNAME_THUMB = "(.+?)"',
|
||||
'variable': 'thumb_server',
|
||||
}, {
|
||||
'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"',
|
||||
'variable': 'thumb_server_path',
|
||||
}]
|
||||
}]
|
||||
|
||||
for data in datas:
|
||||
temp = self._download_webpage(data['url'], msg_id)
|
||||
for var in data['vars']:
|
||||
val = self._search_regex(var['regex'], temp, msg_id)
|
||||
if not val:
|
||||
raise ExtractorError('%s not found' % var['variable'])
|
||||
self._player[var['variable']] = val
|
||||
|
||||
def _generate_token(self):
|
||||
json = self._download_json(
|
||||
'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation',
|
||||
headers={'Authorization': 'Basic %s' % self._player['app_auth']},
|
||||
data=urlencode_postdata({'grant_type': 'client_credentials'}))
|
||||
token = json.get('access_token')
|
||||
if not token:
|
||||
raise ExtractorError('access token not generated')
|
||||
return token
|
||||
|
||||
def _generate_thumbnail(self, thumb_id, width='720'):
|
||||
if not thumb_id or not self._player.get('thumb_server'):
|
||||
return None
|
||||
if not self._player.get('thumb_server_path'):
|
||||
self._player['thumb_server_path'] = ''
|
||||
return '%s%s/asset/thumbnail/%s/%s' % (
|
||||
self._player['thumb_server'],
|
||||
self._player['thumb_server_path'],
|
||||
thumb_id, width)
|
||||
|
||||
def _real_extract(self, url):
|
||||
urlc = re.match(self._VALID_URL, url).groupdict()
|
||||
video_id = urlc['id']
|
||||
stream_type = is_live = None
|
||||
if urlc['type'] in {'live', 'cast'}:
|
||||
stream_type = urlc['type'] + '/channel'
|
||||
is_live = True
|
||||
else:
|
||||
stream_type = 'vod'
|
||||
is_live = False
|
||||
token = self._generate_token()
|
||||
json = self._download_json(
|
||||
'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % (
|
||||
stream_type, video_id), video_id,
|
||||
headers={'Authorization': 'Bearer %s' % token,
|
||||
'Content-Type': 'application/json'},
|
||||
data=bytes('{}', 'utf-8'))
|
||||
|
||||
formats = []
|
||||
for src in json.get('srcs') or []:
|
||||
if src.get('mimeType') == 'application/x-mpegurl':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
src.get('uniqueStreamer'), video_id, 'mp4'))
|
||||
if src.get('mimeType') == 'video/flash':
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'url': src.get('uniqueStreamer'),
|
||||
'ext': determine_ext(src.get('uniqueStreamer'), 'flv'),
|
||||
'rtmp_live': is_live,
|
||||
})
|
||||
json = json.get('resource')
|
||||
thumb = self._generate_thumbnail(json.get('thumbnailId'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json.get('title') or json.get('name'),
|
||||
'duration': parse_duration(json.get('duration')),
|
||||
'formats': formats,
|
||||
'thumbnail': thumb,
|
||||
'is_live': is_live,
|
||||
}
|
||||
@@ -25,7 +25,9 @@
|
||||
)
|
||||
from ..jsinterp import JSInterpreter
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
format_field,
|
||||
float_or_none,
|
||||
@@ -59,9 +61,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
||||
|
||||
_RESERVED_NAMES = (
|
||||
r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
|
||||
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
|
||||
r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
|
||||
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
|
||||
r'movies|results|shared|hashtag|trending|feed|feeds|'
|
||||
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
|
||||
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
@@ -271,15 +273,21 @@ def _real_initialize(self):
|
||||
if not self._login():
|
||||
return
|
||||
|
||||
_YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
|
||||
_DEFAULT_API_DATA = {
|
||||
'context': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20210301.08.00',
|
||||
'clientVersion': _YT_WEB_CLIENT_VERSION,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_DEFAULT_BASIC_API_HEADERS = {
|
||||
'X-YouTube-Client-Name': '1',
|
||||
'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
|
||||
}
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
@@ -301,7 +309,6 @@ def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||
auth = self._generate_sapisidhash_header()
|
||||
if auth is not None:
|
||||
headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
|
||||
|
||||
return self._download_json(
|
||||
'https://www.youtube.com/youtubei/v1/%s' % ep,
|
||||
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
|
||||
@@ -315,6 +322,27 @@ def _extract_yt_initial_data(self, video_id, webpage):
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
return token
|
||||
return self._search_regex(
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None)
|
||||
|
||||
@staticmethod
|
||||
def _extract_account_syncid(data):
|
||||
"""Extract syncId required to download private playlists of secondary channels"""
|
||||
sync_ids = (
|
||||
try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
|
||||
or '').split("||")
|
||||
if len(sync_ids) >= 2 and sync_ids[1]:
|
||||
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
||||
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
||||
return sync_ids[0]
|
||||
|
||||
def _extract_ytcfg(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -1462,6 +1490,270 @@ def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
|
||||
(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
|
||||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
||||
|
||||
@staticmethod
|
||||
def _join_text_entries(runs):
|
||||
text = None
|
||||
for run in runs:
|
||||
if not isinstance(run, dict):
|
||||
continue
|
||||
sub_text = try_get(run, lambda x: x['text'], compat_str)
|
||||
if sub_text:
|
||||
if not text:
|
||||
text = sub_text
|
||||
continue
|
||||
text += sub_text
|
||||
return text
|
||||
|
||||
def _extract_comment(self, comment_renderer, parent=None):
|
||||
comment_id = comment_renderer.get('commentId')
|
||||
if not comment_id:
|
||||
return
|
||||
comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
|
||||
text = self._join_text_entries(comment_text_runs) or ''
|
||||
comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
|
||||
time_text = self._join_text_entries(comment_time_text)
|
||||
|
||||
author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
|
||||
author_id = try_get(comment_renderer,
|
||||
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
|
||||
votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
|
||||
lambda x: x['likeCount']), compat_str)) or 0
|
||||
author_thumbnail = try_get(comment_renderer,
|
||||
lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
|
||||
|
||||
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
|
||||
is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
|
||||
|
||||
return {
|
||||
'id': comment_id,
|
||||
'text': text,
|
||||
# TODO: This should be parsed to timestamp
|
||||
'time_text': time_text,
|
||||
'like_count': votes,
|
||||
'is_favorited': is_liked,
|
||||
'author': author,
|
||||
'author_id': author_id,
|
||||
'author_thumbnail': author_thumbnail,
|
||||
'author_is_uploader': author_is_uploader,
|
||||
'parent': parent or 'root'
|
||||
}
|
||||
|
||||
def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
|
||||
session_token_list, parent=None, comment_counts=None):
|
||||
|
||||
def extract_thread(parent_renderer):
|
||||
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
|
||||
if not parent:
|
||||
comment_counts[2] = 0
|
||||
for content in contents:
|
||||
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
||||
comment_renderer = try_get(
|
||||
comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
|
||||
content, (lambda x: x['commentRenderer'], dict))
|
||||
|
||||
if not comment_renderer:
|
||||
continue
|
||||
comment = self._extract_comment(comment_renderer, parent)
|
||||
if not comment:
|
||||
continue
|
||||
comment_counts[0] += 1
|
||||
yield comment
|
||||
# Attempt to get the replies
|
||||
comment_replies_renderer = try_get(
|
||||
comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
|
||||
|
||||
if comment_replies_renderer:
|
||||
comment_counts[2] += 1
|
||||
comment_entries_iter = self._comment_entries(
|
||||
comment_replies_renderer, identity_token, account_syncid,
|
||||
parent=comment.get('id'), session_token_list=session_token_list,
|
||||
comment_counts=comment_counts)
|
||||
|
||||
for reply_comment in comment_entries_iter:
|
||||
yield reply_comment
|
||||
|
||||
if not comment_counts:
|
||||
# comment so far, est. total comments, current comment thread #
|
||||
comment_counts = [0, 0, 0]
|
||||
headers = self._DEFAULT_BASIC_API_HEADERS.copy()
|
||||
|
||||
# TODO: Generalize the download code with TabIE
|
||||
if identity_token:
|
||||
headers['x-youtube-identity-token'] = identity_token
|
||||
|
||||
if account_syncid:
|
||||
headers['X-Goog-PageId'] = account_syncid
|
||||
headers['X-Goog-AuthUser'] = 0
|
||||
|
||||
continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
|
||||
first_continuation = False
|
||||
if parent is None:
|
||||
first_continuation = True
|
||||
|
||||
for page_num in itertools.count(0):
|
||||
if not continuation:
|
||||
break
|
||||
retries = self._downloader.params.get('extractor_retries', 3)
|
||||
count = -1
|
||||
last_error = None
|
||||
|
||||
while count < retries:
|
||||
count += 1
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % last_error)
|
||||
try:
|
||||
query = {
|
||||
'ctoken': continuation['ctoken'],
|
||||
'pbj': 1,
|
||||
'type': 'next',
|
||||
}
|
||||
if parent:
|
||||
query['action_get_comment_replies'] = 1
|
||||
else:
|
||||
query['action_get_comments'] = 1
|
||||
|
||||
comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
|
||||
if page_num == 0:
|
||||
if first_continuation:
|
||||
note_prefix = "Downloading initial comment continuation page"
|
||||
else:
|
||||
note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
|
||||
else:
|
||||
note_prefix = "%sDownloading comment%s page %d %s" % (
|
||||
" " if parent else "",
|
||||
' replies' if parent else '',
|
||||
page_num,
|
||||
comment_prog_str)
|
||||
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/comment_service_ajax', None,
|
||||
'%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
|
||||
headers=headers, query=query,
|
||||
data=urlencode_postdata({
|
||||
'session_token': session_token_list[0]
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
|
||||
if e.cause.code == 413:
|
||||
self.report_warning("Assumed end of comments (received HTTP Error 413)")
|
||||
return
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
last_error = 'HTTP Error %s' % e.cause.code
|
||||
if e.cause.code == 404:
|
||||
last_error = last_error + " (this API is probably deprecated)"
|
||||
if count < retries:
|
||||
continue
|
||||
raise
|
||||
else:
|
||||
session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
|
||||
if session_token:
|
||||
session_token_list[0] = session_token
|
||||
|
||||
response = try_get(browse,
|
||||
(lambda x: x['response'],
|
||||
lambda x: x[1]['response'])) or {}
|
||||
|
||||
if response.get('continuationContents'):
|
||||
break
|
||||
|
||||
# YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
|
||||
if browse.get('reload'):
|
||||
raise ExtractorError("Invalid or missing params in continuation request", expected=False)
|
||||
|
||||
# TODO: not tested, merged from old extractor
|
||||
err_msg = browse.get('externalErrorMessage')
|
||||
if err_msg:
|
||||
raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
|
||||
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
last_error = 'Incomplete data received'
|
||||
if count >= retries:
|
||||
self._downloader.report_error(last_error)
|
||||
|
||||
if not response:
|
||||
break
|
||||
|
||||
known_continuation_renderers = {
|
||||
'itemSectionContinuation': extract_thread,
|
||||
'commentRepliesContinuation': extract_thread
|
||||
}
|
||||
|
||||
# extract next root continuation from the results
|
||||
continuation_contents = try_get(
|
||||
response, lambda x: x['continuationContents'], dict) or {}
|
||||
|
||||
for key, value in continuation_contents.items():
|
||||
if key not in known_continuation_renderers:
|
||||
continue
|
||||
continuation_renderer = value
|
||||
|
||||
if first_continuation:
|
||||
first_continuation = False
|
||||
expected_comment_count = try_get(
|
||||
continuation_renderer,
|
||||
(lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
|
||||
lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
|
||||
compat_str)
|
||||
|
||||
if expected_comment_count:
|
||||
comment_counts[1] = str_to_int(expected_comment_count)
|
||||
self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
|
||||
yield comment_counts[1]
|
||||
|
||||
# TODO: cli arg.
|
||||
# 1/True for newest, 0/False for popular (default)
|
||||
comment_sort_index = int(True)
|
||||
sort_continuation_renderer = try_get(
|
||||
continuation_renderer,
|
||||
lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
|
||||
[comment_sort_index]['continuation']['reloadContinuationData'], dict)
|
||||
# If this fails, the initial continuation page
|
||||
# starts off with popular anyways.
|
||||
if sort_continuation_renderer:
|
||||
continuation = YoutubeTabIE._build_continuation_query(
|
||||
continuation=sort_continuation_renderer.get('continuation'),
|
||||
ctp=sort_continuation_renderer.get('clickTrackingParams'))
|
||||
self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
|
||||
break
|
||||
|
||||
for entry in known_continuation_renderers[key](continuation_renderer):
|
||||
yield entry
|
||||
|
||||
continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
|
||||
break
|
||||
|
||||
def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
|
||||
"""Entry for comment extraction"""
|
||||
comments = []
|
||||
known_entry_comment_renderers = (
|
||||
'itemSectionRenderer',
|
||||
)
|
||||
estimated_total = 0
|
||||
for entry in contents:
|
||||
for key, renderer in entry.items():
|
||||
if key not in known_entry_comment_renderers:
|
||||
continue
|
||||
|
||||
comment_iter = self._comment_entries(
|
||||
renderer,
|
||||
identity_token=self._extract_identity_token(webpage, item_id=video_id),
|
||||
account_syncid=self._extract_account_syncid(ytcfg),
|
||||
session_token_list=[xsrf_token])
|
||||
|
||||
for comment in comment_iter:
|
||||
if isinstance(comment, int):
|
||||
estimated_total = comment
|
||||
continue
|
||||
comments.append(comment)
|
||||
break
|
||||
self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
|
||||
return {
|
||||
'comments': comments,
|
||||
'comment_count': len(comments),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
@@ -1655,7 +1947,7 @@ def feed_entry(name):
|
||||
f['format_id'] = itag
|
||||
formats.append(f)
|
||||
|
||||
if self._downloader.params.get('youtube_include_dash_manifest'):
|
||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
dash_manifest_url = streaming_data.get('dashManifestUrl')
|
||||
if dash_manifest_url:
|
||||
for f in self._extract_mpd_formats(
|
||||
@@ -1775,7 +2067,7 @@ def feed_entry(name):
|
||||
'tags': keywords,
|
||||
'is_live': is_live,
|
||||
'playable_in_embed': playability_status.get('playableInEmbed'),
|
||||
'was_live': video_details.get('isLiveContent')
|
||||
'was_live': video_details.get('isLiveContent'),
|
||||
}
|
||||
|
||||
pctr = try_get(
|
||||
@@ -1858,6 +2150,7 @@ def process_language(container, base_url, lang_code, query):
|
||||
# This will error if there is no livechat
|
||||
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||
info['subtitles']['live_chat'] = [{
|
||||
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
|
||||
'video_id': video_id,
|
||||
'ext': 'json',
|
||||
'protocol': 'youtube_live_chat_replay',
|
||||
@@ -1992,6 +2285,30 @@ def chapter_time(mmlir):
|
||||
if v:
|
||||
info[d_k] = v
|
||||
|
||||
is_private = bool_or_none(video_details.get('isPrivate'))
|
||||
is_unlisted = bool_or_none(microformat.get('isUnlisted'))
|
||||
is_membersonly = None
|
||||
if initial_data and is_private is not None:
|
||||
is_membersonly = False
|
||||
contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
|
||||
for content in contents or []:
|
||||
badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
|
||||
for badge in badges or []:
|
||||
label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
|
||||
if label.lower() == 'members only':
|
||||
is_membersonly = True
|
||||
break
|
||||
if is_membersonly:
|
||||
break
|
||||
|
||||
# TODO: Add this for playlists
|
||||
info['availability'] = self._availability(
|
||||
is_private=is_private,
|
||||
needs_premium=False, # Youtube no longer have premium-only videos?
|
||||
needs_subscription=is_membersonly,
|
||||
needs_auth=info['age_limit'] >= 18,
|
||||
is_unlisted=None if is_private is None else is_unlisted)
|
||||
|
||||
# get xsrf for annotations or comments
|
||||
get_annotations = self._downloader.params.get('writeannotations', False)
|
||||
get_comments = self._downloader.params.get('getcomments', False)
|
||||
@@ -2024,156 +2341,8 @@ def chapter_time(mmlir):
|
||||
errnote='Unable to download video annotations', fatal=False,
|
||||
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
||||
|
||||
# Get comments
|
||||
# TODO: Refactor and move to seperate function
|
||||
def extract_comments():
|
||||
expected_video_comment_count = 0
|
||||
video_comments = []
|
||||
comment_xsrf = xsrf_token
|
||||
|
||||
def find_value(html, key, num_chars=2, separator='"'):
|
||||
pos_begin = html.find(key) + len(key) + num_chars
|
||||
pos_end = html.find(separator, pos_begin)
|
||||
return html[pos_begin: pos_end]
|
||||
|
||||
def search_dict(partial, key):
|
||||
if isinstance(partial, dict):
|
||||
for k, v in partial.items():
|
||||
if k == key:
|
||||
yield v
|
||||
else:
|
||||
for o in search_dict(v, key):
|
||||
yield o
|
||||
elif isinstance(partial, list):
|
||||
for i in partial:
|
||||
for o in search_dict(i, key):
|
||||
yield o
|
||||
|
||||
continuations = []
|
||||
if initial_data:
|
||||
try:
|
||||
ncd = next(search_dict(initial_data, 'nextContinuationData'))
|
||||
continuations = [ncd['continuation']]
|
||||
# Handle videos where comments have been disabled entirely
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
def get_continuation(continuation, session_token, replies=False):
|
||||
query = {
|
||||
'pbj': 1,
|
||||
'ctoken': continuation,
|
||||
}
|
||||
if replies:
|
||||
query['action_get_comment_replies'] = 1
|
||||
else:
|
||||
query['action_get_comments'] = 1
|
||||
|
||||
while True:
|
||||
content, handle = self._download_webpage_handle(
|
||||
'https://www.youtube.com/comment_service_ajax',
|
||||
video_id,
|
||||
note=False,
|
||||
expected_status=[413],
|
||||
data=urlencode_postdata({
|
||||
'session_token': session_token
|
||||
}),
|
||||
query=query,
|
||||
headers={
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
|
||||
'X-YouTube-Client-Name': '1',
|
||||
'X-YouTube-Client-Version': '2.20201202.06.01'
|
||||
}
|
||||
)
|
||||
|
||||
response_code = handle.getcode()
|
||||
if (response_code == 200):
|
||||
return self._parse_json(content, video_id)
|
||||
if (response_code == 413):
|
||||
return None
|
||||
raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
|
||||
|
||||
first_continuation = True
|
||||
chain_msg = ''
|
||||
self.to_screen('Downloading comments')
|
||||
while continuations:
|
||||
continuation = continuations.pop()
|
||||
comment_response = get_continuation(continuation, comment_xsrf)
|
||||
if not comment_response:
|
||||
continue
|
||||
if list(search_dict(comment_response, 'externalErrorMessage')):
|
||||
raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
|
||||
|
||||
if 'continuationContents' not in comment_response['response']:
|
||||
# Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
|
||||
continue
|
||||
# not sure if this actually helps
|
||||
if 'xsrf_token' in comment_response:
|
||||
comment_xsrf = comment_response['xsrf_token']
|
||||
|
||||
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
|
||||
if first_continuation:
|
||||
expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
|
||||
first_continuation = False
|
||||
if 'contents' not in item_section:
|
||||
# continuation returned no comments?
|
||||
# set an empty array as to not break the for loop
|
||||
item_section['contents'] = []
|
||||
|
||||
for meta_comment in item_section['contents']:
|
||||
comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
|
||||
video_comments.append({
|
||||
'id': comment['commentId'],
|
||||
'text': ''.join([c['text'] for c in try_get(comment, lambda x: x['contentText']['runs'], list) or []]),
|
||||
'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
|
||||
'author': comment.get('authorText', {}).get('simpleText', ''),
|
||||
'votes': comment.get('voteCount', {}).get('simpleText', '0'),
|
||||
'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
|
||||
'parent': 'root'
|
||||
})
|
||||
if 'replies' not in meta_comment['commentThreadRenderer']:
|
||||
continue
|
||||
|
||||
reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
|
||||
while reply_continuations:
|
||||
time.sleep(1)
|
||||
continuation = reply_continuations.pop()
|
||||
replies_data = get_continuation(continuation, comment_xsrf, True)
|
||||
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
|
||||
continue
|
||||
|
||||
if self._downloader.params.get('verbose', False):
|
||||
chain_msg = ' (chain %s)' % comment['commentId']
|
||||
self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
|
||||
reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
|
||||
for reply_meta in reply_comment_meta.get('contents', {}):
|
||||
reply_comment = reply_meta['commentRenderer']
|
||||
video_comments.append({
|
||||
'id': reply_comment['commentId'],
|
||||
'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
|
||||
'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
|
||||
'author': reply_comment.get('authorText', {}).get('simpleText', ''),
|
||||
'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
|
||||
'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
|
||||
'parent': comment['commentId']
|
||||
})
|
||||
if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
|
||||
continue
|
||||
reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
|
||||
|
||||
self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
|
||||
if 'continuations' in item_section:
|
||||
continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
|
||||
time.sleep(1)
|
||||
|
||||
self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
|
||||
return {
|
||||
'comments': video_comments,
|
||||
'comment_count': expected_video_comment_count
|
||||
}
|
||||
|
||||
if get_comments:
|
||||
info['__post_extractor'] = extract_comments
|
||||
info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
|
||||
|
||||
self.mark_watched(video_id, player_response)
|
||||
|
||||
@@ -2520,17 +2689,22 @@ def _extract_channel_id(self, webpage):
|
||||
channel_url, 'channel id')
|
||||
|
||||
@staticmethod
|
||||
def _extract_grid_item_renderer(item):
|
||||
for item_kind in ('Playlist', 'Video', 'Channel'):
|
||||
renderer = item.get('grid%sRenderer' % item_kind)
|
||||
if renderer:
|
||||
return renderer
|
||||
def _extract_basic_item_renderer(item):
|
||||
# Modified from _extract_grid_item_renderer
|
||||
known_renderers = (
|
||||
'playlistRenderer', 'videoRenderer', 'channelRenderer',
|
||||
'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
|
||||
)
|
||||
for key, renderer in item.items():
|
||||
if key not in known_renderers:
|
||||
continue
|
||||
return renderer
|
||||
|
||||
def _grid_entries(self, grid_renderer):
|
||||
for item in grid_renderer['items']:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
renderer = self._extract_grid_item_renderer(item)
|
||||
renderer = self._extract_basic_item_renderer(item)
|
||||
if not isinstance(renderer, dict):
|
||||
continue
|
||||
title = try_get(
|
||||
@@ -2559,7 +2733,7 @@ def _shelf_entries_from_content(self, shelf_renderer):
|
||||
content = shelf_renderer.get('content')
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
renderer = content.get('gridRenderer')
|
||||
renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
|
||||
if renderer:
|
||||
# TODO: add support for nested playlists so each shelf is processed
|
||||
# as separate playlist
|
||||
@@ -2601,20 +2775,6 @@ def _playlist_entries(self, video_list_renderer):
|
||||
continue
|
||||
yield self._extract_video(renderer)
|
||||
|
||||
r""" # Not needed in the new implementation
|
||||
def _itemSection_entries(self, item_sect_renderer):
|
||||
for content in item_sect_renderer['contents']:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
renderer = content.get('videoRenderer', {})
|
||||
if not isinstance(renderer, dict):
|
||||
continue
|
||||
video_id = renderer.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
yield self._extract_video(renderer)
|
||||
"""
|
||||
|
||||
def _rich_entries(self, rich_grid_renderer):
|
||||
renderer = try_get(
|
||||
rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
|
||||
@@ -2713,7 +2873,7 @@ def _extract_continuation(cls, renderer):
|
||||
ctp = continuation_ep.get('clickTrackingParams')
|
||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
||||
|
||||
def _entries(self, tab, identity_token, item_id):
|
||||
def _entries(self, tab, item_id, identity_token, account_syncid):
|
||||
|
||||
def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
|
||||
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
|
||||
@@ -2773,6 +2933,10 @@ def extract_entries(parent_renderer): # this needs to called again for continua
|
||||
if identity_token:
|
||||
headers['x-youtube-identity-token'] = identity_token
|
||||
|
||||
if account_syncid:
|
||||
headers['X-Goog-PageId'] = account_syncid
|
||||
headers['X-Goog-AuthUser'] = 0
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
if not continuation:
|
||||
break
|
||||
@@ -2803,9 +2967,14 @@ def extract_entries(parent_renderer): # this needs to called again for continua
|
||||
else:
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
if response.get('continuationContents') or response.get('onResponseReceivedActions'):
|
||||
if dict_get(response,
|
||||
('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
|
||||
break
|
||||
last_error = 'Incomplete data recieved'
|
||||
|
||||
# Youtube may send alerts if there was an issue with the continuation page
|
||||
self._extract_alerts(response, expected=False)
|
||||
|
||||
last_error = 'Incomplete data received'
|
||||
if count >= retries:
|
||||
self._downloader.report_error(last_error)
|
||||
|
||||
@@ -2837,11 +3006,13 @@ def extract_entries(parent_renderer): # this needs to called again for continua
|
||||
'gridPlaylistRenderer': (self._grid_entries, 'items'),
|
||||
'gridVideoRenderer': (self._grid_entries, 'items'),
|
||||
'playlistVideoRenderer': (self._playlist_entries, 'contents'),
|
||||
'itemSectionRenderer': (self._playlist_entries, 'contents'),
|
||||
'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
|
||||
'richItemRenderer': (extract_entries, 'contents'), # for hashtag
|
||||
'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
|
||||
}
|
||||
continuation_items = try_get(
|
||||
response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
|
||||
response,
|
||||
lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
|
||||
continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
|
||||
video_items_renderer = None
|
||||
for key, value in continuation_item.items():
|
||||
@@ -2888,7 +3059,7 @@ def _extract_uploader(data):
|
||||
try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
|
||||
return {k: v for k, v in uploader.items() if v is not None}
|
||||
|
||||
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
||||
playlist_id = title = description = channel_url = channel_name = channel_id = None
|
||||
thumbnails_list = tags = []
|
||||
|
||||
@@ -2952,16 +3123,41 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||
'channel_id': metadata['uploader_id'],
|
||||
'channel_url': metadata['uploader_url']})
|
||||
return self.playlist_result(
|
||||
self._entries(selected_tab, identity_token, playlist_id),
|
||||
self._entries(
|
||||
selected_tab, playlist_id,
|
||||
self._extract_identity_token(webpage, item_id),
|
||||
self._extract_account_syncid(data)),
|
||||
**metadata)
|
||||
|
||||
def _extract_mix_playlist(self, playlist, playlist_id):
|
||||
first_id = last_id = None
|
||||
for page_num in itertools.count(1):
|
||||
videos = list(self._playlist_entries(playlist))
|
||||
if not videos:
|
||||
return
|
||||
start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
|
||||
if start >= len(videos):
|
||||
return
|
||||
for video in videos[start:]:
|
||||
if video['id'] == first_id:
|
||||
self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
|
||||
return
|
||||
yield video
|
||||
first_id = first_id or videos[0]['id']
|
||||
last_id = videos[-1]['id']
|
||||
|
||||
_, data = self._extract_webpage(
|
||||
'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
|
||||
'%s page %d' % (playlist_id, page_num))
|
||||
playlist = try_get(
|
||||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
||||
|
||||
def _extract_from_playlist(self, item_id, url, data, playlist):
|
||||
title = playlist.get('title') or try_get(
|
||||
data, lambda x: x['titleText']['simpleText'], compat_str)
|
||||
playlist_id = playlist.get('playlistId') or item_id
|
||||
# Inline playlist rendition continuation does not always work
|
||||
# at Youtube side, so delegating regular tab-based playlist URL
|
||||
# processing whenever possible.
|
||||
|
||||
# Delegating everything except mix playlists to regular tab-based playlist URL
|
||||
playlist_url = urljoin(url, try_get(
|
||||
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||
compat_str))
|
||||
@@ -2969,67 +3165,42 @@ def _extract_from_playlist(self, item_id, url, data, playlist):
|
||||
return self.url_result(
|
||||
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||
video_title=title)
|
||||
return self.playlist_result(
|
||||
self._playlist_entries(playlist), playlist_id=playlist_id,
|
||||
playlist_title=title)
|
||||
|
||||
@staticmethod
|
||||
def _extract_alerts(data):
|
||||
for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
|
||||
if not isinstance(alert_dict, dict):
|
||||
continue
|
||||
for renderer in alert_dict:
|
||||
alert = alert_dict[renderer]
|
||||
alert_type = alert.get('type')
|
||||
if not alert_type:
|
||||
return self.playlist_result(
|
||||
self._extract_mix_playlist(playlist, playlist_id),
|
||||
playlist_id=playlist_id, playlist_title=title)
|
||||
|
||||
def _extract_alerts(self, data, expected=False):
|
||||
|
||||
def _real_extract_alerts():
|
||||
for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
|
||||
if not isinstance(alert_dict, dict):
|
||||
continue
|
||||
message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
|
||||
if message:
|
||||
yield alert_type, message
|
||||
for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
|
||||
message = try_get(run, lambda x: x['text'], compat_str)
|
||||
for alert in alert_dict.values():
|
||||
alert_type = alert.get('type')
|
||||
if not alert_type:
|
||||
continue
|
||||
message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
|
||||
if message:
|
||||
yield alert_type, message
|
||||
for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
|
||||
message = try_get(run, lambda x: x['text'], compat_str)
|
||||
if message:
|
||||
yield alert_type, message
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
return token
|
||||
return self._search_regex(
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
url = compat_urlparse.urlunparse(
|
||||
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
|
||||
is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
|
||||
if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
|
||||
self._downloader.report_warning(
|
||||
'A channel/user page was given. All the channel\'s videos will be downloaded. '
|
||||
'To download only the videos in the home page, add a "/featured" to the URL')
|
||||
url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
|
||||
|
||||
# Handle both video/playlist URLs
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('v', [None])[0]
|
||||
playlist_id = qs.get('list', [None])[0]
|
||||
|
||||
if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
|
||||
if playlist_id:
|
||||
self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
|
||||
url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
|
||||
# return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
|
||||
err_msg = None
|
||||
for alert_type, alert_message in _real_extract_alerts():
|
||||
if alert_type.lower() == 'error':
|
||||
if err_msg:
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
|
||||
err_msg = alert_message
|
||||
else:
|
||||
raise ExtractorError('Unable to recognize tab page')
|
||||
if video_id and playlist_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
|
||||
|
||||
if err_msg:
|
||||
raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
|
||||
|
||||
def _extract_webpage(self, url, item_id):
|
||||
retries = self._downloader.params.get('extractor_retries', 3)
|
||||
count = -1
|
||||
last_error = 'Incomplete yt initial data recieved'
|
||||
@@ -3041,40 +3212,67 @@ def _real_extract(self, url):
|
||||
self.report_warning('%s. Retrying ...' % last_error)
|
||||
webpage = self._download_webpage(
|
||||
url, item_id,
|
||||
'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
|
||||
identity_token = self._extract_identity_token(webpage, item_id)
|
||||
'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
|
||||
data = self._extract_yt_initial_data(item_id, webpage)
|
||||
err_msg = None
|
||||
for alert_type, alert_message in self._extract_alerts(data):
|
||||
if alert_type.lower() == 'error':
|
||||
if err_msg:
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
|
||||
err_msg = alert_message
|
||||
else:
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
|
||||
if err_msg:
|
||||
raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
|
||||
self._extract_alerts(data, expected=True)
|
||||
if data.get('contents') or data.get('currentVideoEndpoint'):
|
||||
break
|
||||
if count >= retries:
|
||||
self._downloader.report_error(last_error)
|
||||
return webpage, data
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
url = compat_urlparse.urlunparse(
|
||||
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
|
||||
|
||||
# This is not matched in a channel page with a tab selected
|
||||
mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
|
||||
mobj = mobj.groupdict() if mobj else {}
|
||||
if mobj and not mobj.get('not_channel'):
|
||||
self._downloader.report_warning(
|
||||
'A channel/user page was given. All the channel\'s videos will be downloaded. '
|
||||
'To download only the videos in the home page, add a "/featured" to the URL')
|
||||
url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
|
||||
|
||||
# Handle both video/playlist URLs
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('v', [None])[0]
|
||||
playlist_id = qs.get('list', [None])[0]
|
||||
|
||||
if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
|
||||
if not playlist_id:
|
||||
# If there is neither video or playlist ids,
|
||||
# youtube redirects to home page, which is undesirable
|
||||
raise ExtractorError('Unable to recognize tab page')
|
||||
self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
|
||||
url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
|
||||
|
||||
if video_id and playlist_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
webpage, data = self._extract_webpage(url, item_id)
|
||||
|
||||
tabs = try_get(
|
||||
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
||||
if tabs:
|
||||
return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
|
||||
return self._extract_from_tabs(item_id, webpage, data, tabs)
|
||||
|
||||
playlist = try_get(
|
||||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
||||
if playlist:
|
||||
return self._extract_from_playlist(item_id, url, data, playlist)
|
||||
# Fallback to video extraction if no playlist alike page is recognized.
|
||||
# First check for the current video then try the v attribute of URL query.
|
||||
|
||||
video_id = try_get(
|
||||
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
|
||||
compat_str) or video_id
|
||||
if video_id:
|
||||
self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
# Failed to recognize
|
||||
|
||||
raise ExtractorError('Unable to recognize tab page')
|
||||
|
||||
|
||||
@@ -3338,7 +3536,6 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE):
|
||||
Subclasses must define the _FEED_NAME property.
|
||||
"""
|
||||
_LOGIN_REQUIRED = True
|
||||
# _MAX_PAGES = 5
|
||||
_TESTS = []
|
||||
|
||||
@property
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
@@ -16,24 +17,34 @@
|
||||
|
||||
|
||||
class Zee5IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zee5\.com/[^#?]*/(?P<display_id>[-\w]+)/(?P<id>[-\d]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
zee5:|
|
||||
(?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
|
||||
(?:
|
||||
(?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
|
||||
|movies/[^#/?]+
|
||||
)/(?P<display_id>[^#/?]+)/
|
||||
)
|
||||
(?P<id>[^#/?]+)/?(?:$|[?#])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zee5.com/movies/details/krishna-the-birth/0-0-63098',
|
||||
'info_dict': {
|
||||
"id": "0-0-63098",
|
||||
"ext": "m3u8",
|
||||
"display_id": "krishna-the-birth",
|
||||
"title": "Krishna - The Birth",
|
||||
"duration": 4368,
|
||||
"average_rating": 4,
|
||||
"description": str,
|
||||
"alt_title": "Krishna - The Birth",
|
||||
"uploader": "Zee Entertainment Enterprises Ltd",
|
||||
"release_date": "20060101",
|
||||
"upload_date": "20060101",
|
||||
"timestamp": 1136073600,
|
||||
"thumbnail": "https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg",
|
||||
"tags": list
|
||||
'id': '0-0-63098',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'krishna-the-birth',
|
||||
'title': 'Krishna - The Birth',
|
||||
'duration': 4368,
|
||||
'average_rating': 4,
|
||||
'description': str,
|
||||
'alt_title': 'Krishna - The Birth',
|
||||
'uploader': 'Zee Entertainment Enterprises Ltd',
|
||||
'release_date': '20060101',
|
||||
'upload_date': '20060101',
|
||||
'timestamp': 1136073600,
|
||||
'thumbnail': 'https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg',
|
||||
'tags': list
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
@@ -41,37 +52,43 @@ class Zee5IE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://zee5.com/tvshows/details/krishna-balram/0-6-1871/episode-1-the-test-of-bramha/0-1-233402',
|
||||
'info_dict': {
|
||||
"id": "0-1-233402",
|
||||
'ext': 'm3u8',
|
||||
"display_id": "episode-1-the-test-of-bramha",
|
||||
"title": "Episode 1 - The Test Of Bramha",
|
||||
"duration": 1336,
|
||||
"average_rating": 4,
|
||||
"description": str,
|
||||
"alt_title": "Episode 1 - The Test Of Bramha",
|
||||
"uploader": "Green Gold",
|
||||
"release_date": "20090101",
|
||||
"upload_date": "20090101",
|
||||
"timestamp": 1230768000,
|
||||
"thumbnail": "https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg",
|
||||
"series": "Krishna Balram",
|
||||
"season_number": 1,
|
||||
"episode_number": 1,
|
||||
"tags": list,
|
||||
'id': '0-1-233402',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'episode-1-the-test-of-bramha',
|
||||
'title': 'Episode 1 - The Test Of Bramha',
|
||||
'duration': 1336,
|
||||
'average_rating': 4,
|
||||
'description': str,
|
||||
'alt_title': 'Episode 1 - The Test Of Bramha',
|
||||
'uploader': 'Green Gold',
|
||||
'release_date': '20090101',
|
||||
'upload_date': '20090101',
|
||||
'timestamp': 1230768000,
|
||||
'thumbnail': 'https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg',
|
||||
'series': 'Krishna Balram',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/global/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).group('id', 'display_id')
|
||||
access_token_request = self._download_json(
|
||||
'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
|
||||
video_id, note="Downloading access token")
|
||||
video_id, note='Downloading access token')
|
||||
token_request = self._download_json(
|
||||
'https://useraction.zee5.com/tokennd',
|
||||
video_id, note="Downloading video token")
|
||||
video_id, note='Downloading video token')
|
||||
json_data = self._download_json(
|
||||
'https://gwapi.zee5.com/content/details/{}?translation=en&country=IN'.format(video_id),
|
||||
video_id, headers={'X-Access-Token': access_token_request['token']})
|
||||
@@ -111,3 +128,78 @@ def _real_extract(self, url):
|
||||
'episode_number': int_or_none(try_get(json_data, lambda x: x['index'])),
|
||||
'tags': try_get(json_data, lambda x: x['tags'], list)
|
||||
}
|
||||
|
||||
|
||||
class Zee5SeriesIE(InfoExtractor):
|
||||
IE_NAME = 'zee5:series'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
zee5:series:|
|
||||
(?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
|
||||
(?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
|
||||
)
|
||||
(?P<id>[^#/?]+)/?(?:$|[?#])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871',
|
||||
'playlist_mincount': 43,
|
||||
'info_dict': {
|
||||
'id': '0-6-1871',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/tvshows/details/bhabi-ji-ghar-par-hai/0-6-199',
|
||||
'playlist_mincount': 1500,
|
||||
'info_dict': {
|
||||
'id': '0-6-199',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/tvshows/details/agent-raghav-crime-branch/0-6-965',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': '0-6-965',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/ta/tvshows/details/nagabhairavi/0-6-3201',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': '0-6-3201',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zee5.com/global/hi/tvshows/details/khwaabon-ki-zamin-par/0-6-270',
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': '0-6-270',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _entries(self, show_id):
|
||||
access_token_request = self._download_json(
|
||||
'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
|
||||
show_id, note='Downloading access token')
|
||||
headers = {
|
||||
'X-Access-Token': access_token_request['token'],
|
||||
'Referer': 'https://www.zee5.com/',
|
||||
}
|
||||
show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)
|
||||
|
||||
page_num = 0
|
||||
show_json = self._download_json(show_url, video_id=show_id, headers=headers)
|
||||
for season in show_json.get('seasons') or []:
|
||||
season_id = try_get(season, lambda x: x['id'], compat_str)
|
||||
next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)
|
||||
while next_url:
|
||||
page_num += 1
|
||||
episodes_json = self._download_json(
|
||||
next_url, video_id=show_id, headers=headers,
|
||||
note='Downloading JSON metadata page %d' % page_num)
|
||||
for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
|
||||
video_id = episode.get('id')
|
||||
yield self.url_result(
|
||||
'zee5:%s' % video_id,
|
||||
ie=Zee5IE.ie_key(), video_id=video_id)
|
||||
next_url = url_or_none(episodes_json.get('next_episode_api'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
@@ -1,93 +1,94 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class ZingMp3BaseInfoExtractor(InfoExtractor):
|
||||
class ZingMp3BaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?:%s)/[^/]+/(?P<id>\w+)\.html'
|
||||
_GEO_COUNTRIES = ['VN']
|
||||
|
||||
def _extract_item(self, item, page_type, fatal=True):
|
||||
error_message = item.get('msg')
|
||||
if error_message:
|
||||
if not fatal:
|
||||
return
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
def _extract_item(self, item, fatal):
|
||||
item_id = item['id']
|
||||
title = item.get('name') or item['title']
|
||||
|
||||
formats = []
|
||||
for quality, source_url in zip(item.get('qualities') or item.get('quality', []), item.get('source_list') or item.get('source', [])):
|
||||
if not source_url or source_url == 'require vip':
|
||||
for k, v in (item.get('source') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if not re.match(r'https?://', source_url):
|
||||
source_url = '//' + source_url
|
||||
source_url = self._proto_relative_url(source_url, 'http:')
|
||||
quality_num = int_or_none(quality)
|
||||
f = {
|
||||
'format_id': quality,
|
||||
'url': source_url,
|
||||
}
|
||||
if page_type == 'video':
|
||||
f.update({
|
||||
'height': quality_num,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
if k in ('mp4', 'hls'):
|
||||
for res, video_url in v.items():
|
||||
if not video_url:
|
||||
continue
|
||||
if k == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, item_id, 'mp4',
|
||||
'm3u8_native', m3u8_id=k, fatal=False))
|
||||
elif k == 'mp4':
|
||||
formats.append({
|
||||
'format_id': 'mp4-' + res,
|
||||
'url': video_url,
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'^(\d+)p', res, 'resolution', default=None)),
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'abr': quality_num,
|
||||
formats.append({
|
||||
'ext': 'mp3',
|
||||
'format_id': k,
|
||||
'tbr': int_or_none(k),
|
||||
'url': self._proto_relative_url(v),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
if not fatal:
|
||||
return
|
||||
msg = item['msg']
|
||||
if msg == 'Sorry, this content is not available in your country.':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
cover = item.get('cover')
|
||||
subtitles = None
|
||||
lyric = item.get('lyric')
|
||||
if lyric:
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}],
|
||||
}
|
||||
|
||||
album = item.get('album') or {}
|
||||
|
||||
return {
|
||||
'title': (item.get('name') or item.get('title')).strip(),
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': 'http:/' + cover if cover else None,
|
||||
'artist': item.get('artist'),
|
||||
'thumbnail': item.get('thumbnail'),
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'track': title,
|
||||
'artist': item.get('artists_names'),
|
||||
'album': album.get('name') or album.get('title'),
|
||||
'album_artist': album.get('artists_names'),
|
||||
}
|
||||
|
||||
def _extract_player_json(self, player_json_url, id, page_type, playlist_title=None):
|
||||
player_json = self._download_json(player_json_url, id, 'Downloading Player JSON')
|
||||
items = player_json['data']
|
||||
if 'item' in items:
|
||||
items = items['item']
|
||||
|
||||
if len(items) == 1:
|
||||
# one single song
|
||||
data = self._extract_item(items[0], page_type)
|
||||
data['id'] = id
|
||||
|
||||
return data
|
||||
else:
|
||||
# playlist of songs
|
||||
entries = []
|
||||
|
||||
for i, item in enumerate(items, 1):
|
||||
entry = self._extract_item(item, page_type, fatal=False)
|
||||
if not entry:
|
||||
continue
|
||||
entry['id'] = '%s-%d' % (id, i)
|
||||
entries.append(entry)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': id,
|
||||
'title': playlist_title,
|
||||
'entries': entries,
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url.replace('://zingmp3.vn/', '://mp3.zing.vn/'),
|
||||
page_id, query={'play_song': 1})
|
||||
data_path = self._search_regex(
|
||||
r'data-xml="([^"]+)', webpage, 'data path')
|
||||
return self._process_data(self._download_json(
|
||||
'https://mp3.zing.vn/xhr' + data_path, page_id)['data'])
|
||||
|
||||
|
||||
class ZingMp3IE(ZingMp3BaseInfoExtractor):
|
||||
_VALID_URL = r'https?://mp3\.zing\.vn/(?:bai-hat|album|playlist|video-clip)/[^/]+/(?P<id>\w+)\.html'
|
||||
class ZingMp3IE(ZingMp3BaseIE):
|
||||
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip'
|
||||
_TESTS = [{
|
||||
'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
|
||||
'md5': 'ead7ae13693b3205cbc89536a077daed',
|
||||
@@ -95,49 +96,66 @@ class ZingMp3IE(ZingMp3BaseInfoExtractor):
|
||||
'id': 'ZWZB9WAB',
|
||||
'title': 'Xa Mãi Xa',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}]
|
||||
},
|
||||
'duration': 255,
|
||||
'track': 'Xa Mãi Xa',
|
||||
'artist': 'Bảo Thy',
|
||||
'album': 'Special Album',
|
||||
'album_artist': 'Bảo Thy',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html',
|
||||
'md5': '870295a9cd8045c0e15663565902618d',
|
||||
'url': 'https://mp3.zing.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
|
||||
'md5': 'e9c972b693aa88301ef981c8151c4343',
|
||||
'info_dict': {
|
||||
'id': 'ZW6BAEA0',
|
||||
'title': 'Let It Go (Frozen OST)',
|
||||
'id': 'ZO8ZF7C7',
|
||||
'title': 'Sương Hoa Đưa Lối',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'duration': 207,
|
||||
'track': 'Sương Hoa Đưa Lối',
|
||||
'artist': 'K-ICM, RYO',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ZWZBWDAF',
|
||||
'title': 'Lâu Đài Tình Ái - Bằng Kiều,Minh Tuyết | Album 320 lossless',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
'skip': 'removed at the request of the owner',
|
||||
}, {
|
||||
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
|
||||
'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
IE_NAME = 'zingmp3'
|
||||
IE_DESC = 'mp3.zing.vn'
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
def _process_data(self, data):
|
||||
return self._extract_item(data, True)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
player_json_url = self._search_regex([
|
||||
r'data-xml="([^"]+)',
|
||||
r'&xmlURL=([^&]+)&'
|
||||
], webpage, 'player xml url')
|
||||
class ZingMp3AlbumIE(ZingMp3BaseIE):
|
||||
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist'
|
||||
_TESTS = [{
|
||||
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ZWZBWDAF',
|
||||
'title': 'Lâu Đài Tình Ái',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zingmp3.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
IE_NAME = 'zingmp3:album'
|
||||
|
||||
playlist_title = None
|
||||
page_type = self._search_regex(r'/(?:html5)?xml/([^/-]+)', player_json_url, 'page type')
|
||||
if page_type == 'video':
|
||||
player_json_url = update_url_query(player_json_url, {'format': 'json'})
|
||||
else:
|
||||
player_json_url = player_json_url.replace('/xml/', '/html5xml/')
|
||||
if page_type == 'album':
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
|
||||
return self._extract_player_json(player_json_url, page_id, page_type, playlist_title)
|
||||
def _process_data(self, data):
|
||||
def entries():
|
||||
for item in (data.get('items') or []):
|
||||
entry = self._extract_item(item, False)
|
||||
if entry:
|
||||
yield entry
|
||||
info = data.get('info') or {}
|
||||
return self.playlist_result(
|
||||
entries(), info.get('id'), info.get('name') or info.get('title'))
|
||||
|
||||
@@ -1,82 +1,68 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
js_to_json,
|
||||
parse_filesize,
|
||||
urlencode_postdata
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ZoomIE(InfoExtractor):
|
||||
IE_NAME = 'zoom'
|
||||
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/(play|share)/(?P<id>[A-Za-z0-9\-_.]+)'
|
||||
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
|
||||
'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
|
||||
'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
|
||||
'info_dict': {
|
||||
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
|
||||
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
|
||||
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
|
||||
'ext': "mp4"
|
||||
'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
|
||||
'ext': 'mp4',
|
||||
'title': 'China\'s "two sessions" and the new five-year plan',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
base_url, play_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, play_id)
|
||||
|
||||
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
|
||||
if password_protected is not None:
|
||||
self._verify_video_password(url, display_id, webpage)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
try:
|
||||
form = self._form_hidden_inputs('password_form', webpage)
|
||||
except ExtractorError:
|
||||
form = None
|
||||
if form:
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This video is protected by a passcode, use the --video-password option', expected=True)
|
||||
is_meeting = form.get('useWhichPasswd') == 'meeting'
|
||||
validation = self._download_json(
|
||||
base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
|
||||
play_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
|
||||
'id': form[('meet' if is_meeting else 'file') + 'Id'],
|
||||
'passwd': password,
|
||||
'action': form.get('action'),
|
||||
}))
|
||||
if not validation.get('status'):
|
||||
raise ExtractorError(validation['errorMessage'], expected=True)
|
||||
webpage = self._download_webpage(url, play_id)
|
||||
|
||||
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
|
||||
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
|
||||
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
|
||||
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
|
||||
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
|
||||
|
||||
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
|
||||
|
||||
formats = []
|
||||
formats.append({
|
||||
'url': url_or_none(video_url),
|
||||
'width': int_or_none(viewResolvtionsWidth),
|
||||
'height': int_or_none(viewResolvtionsHeight),
|
||||
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
|
||||
'Referer': urlprefix},
|
||||
'ext': "mp4",
|
||||
'filesize_approx': int_or_none(fileSize)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'(?s)window\.__data__\s*=\s*({.+?});',
|
||||
webpage, 'data'), play_id, js_to_json)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'formats': formats
|
||||
'id': play_id,
|
||||
'title': data['topic'],
|
||||
'url': data['viewMp4Url'],
|
||||
'width': int_or_none(data.get('viewResolvtionsWidth')),
|
||||
'height': int_or_none(data.get('viewResolvtionsHeight')),
|
||||
'http_headers': {
|
||||
'Referer': base_url,
|
||||
},
|
||||
'filesize_approx': parse_filesize(data.get('fileSize')),
|
||||
}
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
|
||||
data = urlencode_postdata({
|
||||
'id': meetId,
|
||||
'passwd': password,
|
||||
'action': "viewdetailedpage",
|
||||
'recaptcha': ""
|
||||
})
|
||||
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
|
||||
validation_response = self._download_json(
|
||||
validation_url, video_id,
|
||||
note='Validating Password...',
|
||||
errnote='Wrong password?',
|
||||
data=data)
|
||||
|
||||
if validation_response['errorCode'] != 0:
|
||||
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))
|
||||
|
||||
@@ -214,12 +214,11 @@ def _dict_from_multiple_values_options_callback(
|
||||
help='Mark videos watched (YouTube only)')
|
||||
general.add_option(
|
||||
'--no-mark-watched',
|
||||
action='store_false', dest='mark_watched', default=False,
|
||||
help='Do not mark videos watched')
|
||||
action='store_false', dest='mark_watched',
|
||||
help='Do not mark videos watched (default)')
|
||||
general.add_option(
|
||||
'--no-colors',
|
||||
action='store_true', dest='no_color',
|
||||
default=False,
|
||||
action='store_true', dest='no_color', default=False,
|
||||
help='Do not emit color codes in output')
|
||||
|
||||
network = optparse.OptionGroup(parser, 'Network Options')
|
||||
@@ -534,11 +533,11 @@ def _dict_from_multiple_values_options_callback(
|
||||
subtitles.add_option(
|
||||
'--write-auto-subs', '--write-automatic-subs',
|
||||
action='store_true', dest='writeautomaticsub', default=False,
|
||||
help='Write automatically generated subtitle file (YouTube only)')
|
||||
help='Write automatically generated subtitle file (Alias: --write-automatic-subs)')
|
||||
subtitles.add_option(
|
||||
'--no-write-auto-subs', '--no-write-automatic-subs',
|
||||
action='store_false', dest='writeautomaticsub', default=False,
|
||||
help='Do not write automatically generated subtitle file (default)')
|
||||
help='Do not write auto-generated subtitles (default) (Alias: --no-write-automatic-subs)')
|
||||
subtitles.add_option(
|
||||
'--all-subs',
|
||||
action='store_true', dest='allsubtitles', default=False,
|
||||
@@ -552,12 +551,16 @@ def _dict_from_multiple_values_options_callback(
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
|
||||
help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
|
||||
subtitles.add_option(
|
||||
'--sub-lang', '--sub-langs', '--srt-lang',
|
||||
'--sub-langs', '--srt-langs',
|
||||
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
|
||||
default=[], callback=_comma_separated_values_options_callback,
|
||||
help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
|
||||
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
downloader.add_option(
|
||||
'-N', '--concurrent-fragments',
|
||||
dest='concurrent_fragment_downloads', metavar='N', default=1, type=int,
|
||||
help='Number of fragments to download concurrently (default is %default)')
|
||||
downloader.add_option(
|
||||
'-r', '--limit-rate', '--rate-limit',
|
||||
dest='ratelimit', metavar='RATE',
|
||||
@@ -676,7 +679,7 @@ def _dict_from_multiple_values_options_callback(
|
||||
workarounds.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure',
|
||||
action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
help='Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube)')
|
||||
workarounds.add_option(
|
||||
'--user-agent',
|
||||
metavar='UA', dest='user_agent',
|
||||
@@ -704,17 +707,13 @@ def _dict_from_multiple_values_options_callback(
|
||||
'--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
|
||||
dest='sleep_interval', type=float,
|
||||
help=(
|
||||
'Number of seconds to sleep before each download when used alone '
|
||||
'or a lower bound of a range for randomized sleep before each download '
|
||||
'(minimum possible number of seconds to sleep) when used along with '
|
||||
'--max-sleep-interval'))
|
||||
'Number of seconds to sleep before each download. '
|
||||
'This is the minimum time to sleep when used along with --max-sleep-interval '
|
||||
'(Alias: --min-sleep-interval)'))
|
||||
workarounds.add_option(
|
||||
'--max-sleep-interval', metavar='SECONDS',
|
||||
dest='max_sleep_interval', type=float,
|
||||
help=(
|
||||
'Upper bound of a range for randomized sleep before each download '
|
||||
'(maximum possible number of seconds to sleep). Must only be used '
|
||||
'along with --min-sleep-interval'))
|
||||
help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
|
||||
workarounds.add_option(
|
||||
'--sleep-subtitles', metavar='SECONDS',
|
||||
dest='sleep_interval_subtitles', default=0, type=int,
|
||||
@@ -736,7 +735,7 @@ def _dict_from_multiple_values_options_callback(
|
||||
verbosity.add_option(
|
||||
'--skip-download', '--no-download',
|
||||
action='store_true', dest='skip_download', default=False,
|
||||
help='Do not download the video')
|
||||
help='Do not download the video but write all related files (Alias: --no-download)')
|
||||
verbosity.add_option(
|
||||
'-g', '--get-url',
|
||||
action='store_true', dest='geturl', default=False,
|
||||
@@ -979,9 +978,17 @@ def _dict_from_multiple_values_options_callback(
|
||||
filesystem.add_option(
|
||||
'--no-write-playlist-metafiles',
|
||||
action='store_false', dest='allow_playlist_files',
|
||||
help='Do not write playlist metadata when using --write-info-json, --write-description etc.')
|
||||
filesystem.add_option(
|
||||
'--clean-infojson',
|
||||
action='store_true', dest='clean_infojson', default=True,
|
||||
help=(
|
||||
'Do not write playlist metadata when using '
|
||||
'--write-info-json, --write-description etc.'))
|
||||
'Remove some private fields such as filenames from the infojson. '
|
||||
'Note that it could still contain some personal information (default)'))
|
||||
filesystem.add_option(
|
||||
'--no-clean-infojson',
|
||||
action='store_false', dest='clean_infojson',
|
||||
help='Write all fields to the infojson')
|
||||
filesystem.add_option(
|
||||
'--get-comments',
|
||||
action='store_true', dest='getcomments', default=False,
|
||||
@@ -1083,12 +1090,12 @@ def _dict_from_multiple_values_options_callback(
|
||||
'Specify the postprocessor/executable name and the arguments separated by a colon ":" '
|
||||
'to give the argument to the specified postprocessor/executable. Supported postprocessors are: '
|
||||
'SponSkrub, ExtractAudio, VideoRemuxer, VideoConvertor, EmbedSubtitle, Metadata, Merger, '
|
||||
'FixupStretched, FixupM4a, FixupM3u8, SubtitlesConvertor and EmbedThumbnail. '
|
||||
'FixupStretched, FixupM4a, FixupM3u8, SubtitlesConvertor, EmbedThumbnail and SplitChapters. '
|
||||
'The supported executables are: SponSkrub, FFmpeg, FFprobe, and AtomicParsley. '
|
||||
'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
|
||||
'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
|
||||
'a number can be appended to the exe name seperated by "_i" to pass the argument '
|
||||
'before the specified input file. Eg: --ppa "Merger+ffmpeg_i1:-v quiet". '
|
||||
'"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
|
||||
'before the specified input/output file. Eg: --ppa "Merger+ffmpeg_i1:-v quiet". '
|
||||
'You can use this option multiple times to give different arguments to different '
|
||||
'postprocessors. (Alias: --ppa)'))
|
||||
postproc.add_option(
|
||||
@@ -1140,13 +1147,18 @@ def _dict_from_multiple_values_options_callback(
|
||||
metavar='FIELD:FORMAT', dest='metafromfield', action='append',
|
||||
help=(
|
||||
'Parse additional metadata like title/artist from other fields. '
|
||||
'Give field name to extract data from, and format of the field seperated by a ":". '
|
||||
'Give a template or field name to extract data from and the '
|
||||
'format to interpret it as, seperated by a ":". '
|
||||
'Either regular expression with named capture groups or a '
|
||||
'similar syntax to the output template can also be used. '
|
||||
'The parsed parameters replace any existing values and can be use in output template. '
|
||||
'similar syntax to the output template can be used for the FORMAT. '
|
||||
'Similarly, the syntax for output template can be used for FIELD '
|
||||
'to parse the data from multiple fields. '
|
||||
'The parsed parameters replace any existing values and can be used in output templates. '
|
||||
'This option can be used multiple times. '
|
||||
'Example: --parse-metadata "title:%(artist)s - %(title)s" matches a title like '
|
||||
'"Coldplay - Paradise". '
|
||||
'Example: --parse-metadata "%(series)s %(episode_number)s:%(title)s" '
|
||||
'sets the title using series and episode number. '
|
||||
'Example (regex): --parse-metadata "description:Artist - (?P<artist>.+?)"'))
|
||||
postproc.add_option(
|
||||
'--xattrs',
|
||||
@@ -1154,7 +1166,7 @@ def _dict_from_multiple_values_options_callback(
|
||||
help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
|
||||
postproc.add_option(
|
||||
'--fixup',
|
||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||
metavar='POLICY', dest='fixup', default=None,
|
||||
help=(
|
||||
'Automatically correct known faults of the file. '
|
||||
'One of never (do nothing), warn (only emit a warning), '
|
||||
@@ -1176,9 +1188,20 @@ def _dict_from_multiple_values_options_callback(
|
||||
metavar='CMD', dest='exec_cmd',
|
||||
help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
|
||||
postproc.add_option(
|
||||
'--convert-subs', '--convert-subtitles',
|
||||
'--convert-subs', '--convert-sub', '--convert-subtitles',
|
||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||
help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
|
||||
help='Convert the subtitles to another format (currently supported: srt|ass|vtt|lrc) (Alias: --convert-subtitles)')
|
||||
postproc.add_option(
|
||||
'--split-chapters', '--split-tracks',
|
||||
dest='split_chapters', action='store_true', default=False,
|
||||
help=(
|
||||
'Split video into multiple files based on internal chapters. '
|
||||
'The "chapter:" prefix can be used with "--paths" and "--output" to '
|
||||
'set the output filename for the split files. See "OUTPUT TEMPLATE" for details'))
|
||||
postproc.add_option(
|
||||
'--no-split-chapters', '--no-split-tracks',
|
||||
dest='split_chapters', action='store_false',
|
||||
help='Do not split video based on chapters (default)')
|
||||
|
||||
sponskrub = optparse.OptionGroup(parser, 'SponSkrub (SponsorBlock) Options', description=(
|
||||
'SponSkrub (https://github.com/yt-dlp/SponSkrub) is a utility to mark/remove sponsor segments '
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
FFmpegVideoConvertorPP,
|
||||
FFmpegVideoRemuxerPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegSplitChaptersPP,
|
||||
)
|
||||
from .xattrpp import XAttrMetadataPP
|
||||
from .execafterdownload import ExecAfterDownloadPP
|
||||
@@ -31,6 +32,7 @@ def get_postprocessor(key):
|
||||
'ExecAfterDownloadPP',
|
||||
'FFmpegEmbedSubtitlePP',
|
||||
'FFmpegExtractAudioPP',
|
||||
'FFmpegSplitChaptersPP',
|
||||
'FFmpegFixupM3u8PP',
|
||||
'FFmpegFixupM4aPP',
|
||||
'FFmpegFixupStretchedPP',
|
||||
|
||||
@@ -91,10 +91,18 @@ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
|
||||
except Exception:
|
||||
self.report_warning(errnote)
|
||||
|
||||
def _configuration_args(self, *args, **kwargs):
|
||||
def _configuration_args(self, exe, keys=None, default=[], use_compat=True):
|
||||
pp_key = self.pp_key().lower()
|
||||
exe = exe.lower()
|
||||
root_key = exe if pp_key == exe else '%s+%s' % (pp_key, exe)
|
||||
keys = ['%s%s' % (root_key, k) for k in (keys or [''])]
|
||||
if root_key in keys:
|
||||
keys += [root_key] + ([] if pp_key == exe else [(self.pp_key(), exe)]) + ['default']
|
||||
else:
|
||||
use_compat = False
|
||||
return cli_configuration_args(
|
||||
self._downloader.params.get('postprocessor_args'),
|
||||
self.pp_key().lower(), *args, **kwargs)
|
||||
keys, default, use_compat)
|
||||
|
||||
|
||||
class AudioConversionError(PostProcessingError):
|
||||
|
||||
@@ -47,7 +47,7 @@ def run(self, info):
|
||||
self.to_screen('There aren\'t any thumbnails to embed')
|
||||
return [], info
|
||||
|
||||
original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filename']
|
||||
initial_thumbnail = original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filepath']
|
||||
|
||||
if not os.path.exists(encodeFilename(thumbnail_filename)):
|
||||
self.report_warning('Skipping embedding the thumbnail because the file is missing.')
|
||||
@@ -65,6 +65,8 @@ def is_webp(path):
|
||||
if thumbnail_ext != 'webp' and is_webp(thumbnail_filename):
|
||||
self.to_screen('Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename)
|
||||
thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp')
|
||||
if os.path.exists(thumbnail_webp_filename):
|
||||
os.remove(thumbnail_webp_filename)
|
||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename))
|
||||
original_thumbnail = thumbnail_filename = thumbnail_webp_filename
|
||||
thumbnail_ext = 'webp'
|
||||
@@ -85,6 +87,8 @@ def is_webp(path):
|
||||
thumbnail_filename = thumbnail_jpg_filename
|
||||
thumbnail_ext = 'jpg'
|
||||
|
||||
mtime = os.stat(encodeFilename(filename)).st_mtime
|
||||
|
||||
success = True
|
||||
if info['ext'] == 'mp3':
|
||||
options = [
|
||||
@@ -131,7 +135,7 @@ def is_webp(path):
|
||||
x for x in ['AtomicParsley', 'atomicparsley']
|
||||
if check_executable(x, ['-v'])), None)
|
||||
if atomicparsley is None:
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install')
|
||||
|
||||
cmd = [encodeFilename(atomicparsley, True),
|
||||
encodeFilename(filename, True),
|
||||
@@ -139,7 +143,7 @@ def is_webp(path):
|
||||
encodeFilename(thumbnail_filename, True),
|
||||
encodeArgument('-o'),
|
||||
encodeFilename(temp_filename, True)]
|
||||
cmd += [encodeArgument(o) for o in self._configuration_args(exe='AtomicParsley')]
|
||||
cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
|
||||
|
||||
self.to_screen('Adding thumbnail to "%s"' % filename)
|
||||
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
|
||||
@@ -187,10 +191,13 @@ def is_webp(path):
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
self.try_utime(filename, mtime, mtime)
|
||||
|
||||
files_to_delete = [thumbnail_filename]
|
||||
if self._already_have_thumbnail:
|
||||
info['__files_to_move'][original_thumbnail] = replace_extension(
|
||||
info['__thumbnail_filename'], os.path.splitext(original_thumbnail)[1][1:])
|
||||
info['__files_to_move'][initial_thumbnail],
|
||||
os.path.splitext(original_thumbnail)[1][1:])
|
||||
if original_thumbnail == thumbnail_filename:
|
||||
files_to_delete = []
|
||||
elif original_thumbnail != thumbnail_filename:
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
from .common import AudioConversionError, PostProcessor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
@@ -18,7 +19,6 @@
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
ISO639Utils,
|
||||
process_communicate_or_kill,
|
||||
@@ -61,7 +61,7 @@ def __init__(self, downloader=None):
|
||||
|
||||
def check_version(self):
|
||||
if not self.available:
|
||||
raise FFmpegPostProcessorError('ffmpeg not found. Please install')
|
||||
raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
|
||||
|
||||
required_version = '10-0' if self.basename == 'avconv' else '1.0'
|
||||
if is_outdated_version(
|
||||
@@ -165,7 +165,7 @@ def probe_executable(self):
|
||||
|
||||
def get_audio_codec(self, path):
|
||||
if not self.probe_available and not self.available:
|
||||
raise PostProcessingError('ffprobe and ffmpeg not found. Please install')
|
||||
raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
|
||||
try:
|
||||
if self.probe_available:
|
||||
cmd = [
|
||||
@@ -207,7 +207,7 @@ def get_metadata_object(self, path, opts=[]):
|
||||
if self.probe_basename != 'ffprobe':
|
||||
if self.probe_available:
|
||||
self.report_warning('Only ffprobe is supported for metadata extraction')
|
||||
raise PostProcessingError('ffprobe not found. Please install.')
|
||||
raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
|
||||
self.check_version()
|
||||
|
||||
cmd = [
|
||||
@@ -234,25 +234,35 @@ def get_stream_number(self, path, keys, value):
|
||||
return num, len(streams)
|
||||
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
|
||||
return self.real_run_ffmpeg(
|
||||
[(path, []) for path in input_paths],
|
||||
[(out_path, opts)])
|
||||
|
||||
def real_run_ffmpeg(self, input_path_opts, output_path_opts):
|
||||
self.check_version()
|
||||
|
||||
oldest_mtime = min(
|
||||
os.stat(encodeFilename(path)).st_mtime for path in input_paths)
|
||||
os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts)
|
||||
|
||||
cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
|
||||
# avconv does not have repeat option
|
||||
if self.basename == 'ffmpeg':
|
||||
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
|
||||
|
||||
def make_args(file, pre=[], post=[], *args, **kwargs):
|
||||
args = pre + self._configuration_args(*args, **kwargs) + post
|
||||
def make_args(file, args, name, number):
|
||||
keys = ['_%s%d' % (name, number), '_%s' % name]
|
||||
if name == 'o' and number == 1:
|
||||
keys.append('')
|
||||
args += self._configuration_args(self.basename, keys)
|
||||
if name == 'i':
|
||||
args.append('-i')
|
||||
return (
|
||||
[encodeArgument(o) for o in args]
|
||||
[encodeArgument(arg) for arg in args]
|
||||
+ [encodeFilename(self._ffmpeg_filename_argument(file), True)])
|
||||
|
||||
for i, path in enumerate(input_paths):
|
||||
cmd += make_args(path, post=['-i'], exe='%s_i%d' % (self.basename, i + 1), use_default_arg=False)
|
||||
cmd += make_args(out_path, pre=opts, exe=self.basename)
|
||||
for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
|
||||
cmd += [arg for i, o in enumerate(path_opts)
|
||||
for arg in make_args(o[0], o[1], arg_type, i + 1)]
|
||||
|
||||
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
@@ -262,7 +272,8 @@ def make_args(file, pre=[], post=[], *args, **kwargs):
|
||||
if self.get_param('verbose', False):
|
||||
self.report_error(stderr)
|
||||
raise FFmpegPostProcessorError(stderr.split('\n')[-1])
|
||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||
for out_path, _ in output_path_opts:
|
||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||
return stderr.decode('utf-8', 'replace')
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
@@ -474,7 +485,7 @@ def run(self, information):
|
||||
self.report_warning('JSON subtitles cannot be embedded')
|
||||
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
|
||||
sub_langs.append(lang)
|
||||
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
|
||||
sub_filenames.append(sub_info['filepath'])
|
||||
else:
|
||||
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
|
||||
webm_vtt_warn = True
|
||||
@@ -539,8 +550,8 @@ def add(meta_list, info_list=None):
|
||||
|
||||
add('title', ('track', 'title'))
|
||||
add('date', 'upload_date')
|
||||
add(('description', 'comment'), 'description')
|
||||
add('purl', 'webpage_url')
|
||||
add(('description', 'synopsis'), 'description')
|
||||
add(('purl', 'comment'), 'webpage_url')
|
||||
add('track', 'track_number')
|
||||
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
|
||||
add('genre')
|
||||
@@ -700,7 +711,6 @@ def __init__(self, downloader=None, format=None):
|
||||
|
||||
def run(self, info):
|
||||
subs = info.get('requested_subtitles')
|
||||
filename = info['filepath']
|
||||
new_ext = self.format
|
||||
new_format = new_ext
|
||||
if new_format == 'vtt':
|
||||
@@ -720,9 +730,9 @@ def run(self, info):
|
||||
'You have requested to convert json subtitles into another format, '
|
||||
'which is currently not possible')
|
||||
continue
|
||||
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
|
||||
old_file = sub['filepath']
|
||||
sub_filenames.append(old_file)
|
||||
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
|
||||
new_file = replace_extension(old_file, new_ext)
|
||||
|
||||
if ext in ('dfxp', 'ttml', 'tt'):
|
||||
self.report_warning(
|
||||
@@ -730,7 +740,7 @@ def run(self, info):
|
||||
'which results in style information loss')
|
||||
|
||||
dfxp_file = old_file
|
||||
srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
|
||||
srt_file = replace_extension(old_file, 'srt')
|
||||
|
||||
with open(dfxp_file, 'rb') as f:
|
||||
srt_data = dfxp2srt(f.read())
|
||||
@@ -741,7 +751,8 @@ def run(self, info):
|
||||
|
||||
subs[lang] = {
|
||||
'ext': 'srt',
|
||||
'data': srt_data
|
||||
'data': srt_data,
|
||||
'filepath': srt_file,
|
||||
}
|
||||
|
||||
if new_ext == 'srt':
|
||||
@@ -755,6 +766,47 @@ def run(self, info):
|
||||
subs[lang] = {
|
||||
'ext': new_ext,
|
||||
'data': f.read(),
|
||||
'filepath': new_file,
|
||||
}
|
||||
|
||||
info['__files_to_move'][new_file] = replace_extension(
|
||||
info['__files_to_move'][old_file], new_ext)
|
||||
|
||||
return sub_filenames, info
|
||||
|
||||
|
||||
class FFmpegSplitChaptersPP(FFmpegPostProcessor):
|
||||
|
||||
def _prepare_filename(self, number, chapter, info):
|
||||
info = info.copy()
|
||||
info.update({
|
||||
'section_number': number,
|
||||
'section_title': chapter.get('title'),
|
||||
'section_start': chapter.get('start_time'),
|
||||
'section_end': chapter.get('end_time'),
|
||||
})
|
||||
return self._downloader.prepare_filename(info, 'chapter')
|
||||
|
||||
def _ffmpeg_args_for_chapter(self, number, chapter, info):
|
||||
destination = self._prepare_filename(number, chapter, info)
|
||||
if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
|
||||
return
|
||||
|
||||
chapter['filepath'] = destination
|
||||
self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
|
||||
return (
|
||||
destination,
|
||||
['-ss', compat_str(chapter['start_time']),
|
||||
'-t', compat_str(chapter['end_time'] - chapter['start_time'])])
|
||||
|
||||
def run(self, info):
|
||||
chapters = info.get('chapters') or []
|
||||
if not chapters:
|
||||
self.report_warning('Chapter information is unavailable')
|
||||
return [], info
|
||||
|
||||
self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
|
||||
for idx, chapter in enumerate(chapters):
|
||||
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
|
||||
self.real_run_ffmpeg([(info['filepath'], opts)], [(destination, ['-c', 'copy'])])
|
||||
return [], info
|
||||
|
||||
@@ -4,11 +4,10 @@
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import compat_str
|
||||
from ..utils import str_or_none
|
||||
|
||||
|
||||
class MetadataFromFieldPP(PostProcessor):
|
||||
regex = r'(?P<field>\w+):(?P<format>.+)$'
|
||||
regex = r'(?P<in>.+):(?P<out>.+)$'
|
||||
|
||||
def __init__(self, downloader, formats):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
@@ -19,11 +18,20 @@ def __init__(self, downloader, formats):
|
||||
match = re.match(self.regex, f)
|
||||
assert match is not None
|
||||
self._data.append({
|
||||
'field': match.group('field'),
|
||||
'format': match.group('format'),
|
||||
'regex': self.format_to_regex(match.group('format'))})
|
||||
'in': match.group('in'),
|
||||
'out': match.group('out'),
|
||||
'tmpl': self.field_to_template(match.group('in')),
|
||||
'regex': self.format_to_regex(match.group('out')),
|
||||
})
|
||||
|
||||
def format_to_regex(self, fmt):
|
||||
@staticmethod
|
||||
def field_to_template(tmpl):
|
||||
if re.match(r'\w+$', tmpl):
|
||||
return '%%(%s)s' % tmpl
|
||||
return tmpl
|
||||
|
||||
@staticmethod
|
||||
def format_to_regex(fmt):
|
||||
r"""
|
||||
Converts a string like
|
||||
'%(title)s - %(artist)s'
|
||||
@@ -37,7 +45,7 @@ def format_to_regex(self, fmt):
|
||||
# replace %(..)s with regex group and escape other string parts
|
||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||
regex += re.escape(fmt[lastpos:match.start()])
|
||||
regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
|
||||
regex += r'(?P<%s>[^\r\n]+)' % match.group(1)
|
||||
lastpos = match.end()
|
||||
if lastpos < len(fmt):
|
||||
regex += re.escape(fmt[lastpos:])
|
||||
@@ -45,22 +53,16 @@ def format_to_regex(self, fmt):
|
||||
|
||||
def run(self, info):
|
||||
for dictn in self._data:
|
||||
field, regex = dictn['field'], dictn['regex']
|
||||
if field not in info:
|
||||
self.report_warning('Video doesnot have a %s' % field)
|
||||
continue
|
||||
data_to_parse = str_or_none(info[field])
|
||||
if data_to_parse is None:
|
||||
self.report_warning('Field %s cannot be parsed' % field)
|
||||
continue
|
||||
self.write_debug('Searching for r"%s" in %s' % (regex, field))
|
||||
match = re.search(regex, data_to_parse)
|
||||
tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
|
||||
data_to_parse = tmpl % info_copy
|
||||
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
|
||||
match = re.search(dictn['regex'], data_to_parse)
|
||||
if match is None:
|
||||
self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
|
||||
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
|
||||
continue
|
||||
for attribute, value in match.groupdict().items():
|
||||
info[attribute] = value
|
||||
self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
|
||||
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA'))
|
||||
return [], info
|
||||
|
||||
|
||||
|
||||
@@ -13,10 +13,6 @@
|
||||
|
||||
class MoveFilesAfterDownloadPP(PostProcessor):
|
||||
|
||||
def __init__(self, downloader, files_to_move):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self.files_to_move = files_to_move
|
||||
|
||||
@classmethod
|
||||
def pp_key(cls):
|
||||
return 'MoveFiles'
|
||||
@@ -25,11 +21,10 @@ def run(self, info):
|
||||
dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
|
||||
finaldir = info.get('__finaldir', dl_path)
|
||||
finalpath = os.path.join(finaldir, dl_name)
|
||||
self.files_to_move.update(info['__files_to_move'])
|
||||
self.files_to_move[info['filepath']] = decodeFilename(finalpath)
|
||||
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
|
||||
|
||||
make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
|
||||
for oldfile, newfile in self.files_to_move.items():
|
||||
for oldfile, newfile in info['__files_to_move'].items():
|
||||
if not newfile:
|
||||
newfile = make_newfilename(oldfile)
|
||||
if os.path.abspath(encodeFilename(oldfile)) == os.path.abspath(encodeFilename(newfile)):
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
from ..compat import compat_shlex_split
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
cli_option,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
shell_quote,
|
||||
@@ -31,7 +32,7 @@ def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False,
|
||||
if path:
|
||||
raise PostProcessingError('sponskrub not found in "%s"' % path)
|
||||
else:
|
||||
raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path.')
|
||||
raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path')
|
||||
|
||||
def get_exe(self, path=''):
|
||||
if not path or not check_executable(path, ['-h']):
|
||||
@@ -70,8 +71,9 @@ def run(self, information):
|
||||
cmd = [self.path]
|
||||
if not self.cutout:
|
||||
cmd += ['-chapter']
|
||||
cmd += cli_option(self._downloader.params, '-proxy', 'proxy')
|
||||
cmd += compat_shlex_split(self.args) # For backward compatibility
|
||||
cmd += self._configuration_args(exe=self._exe_name, use_default_arg='no_compat')
|
||||
cmd += self._configuration_args(self._exe_name, use_compat=False)
|
||||
cmd += ['--', information['id'], filename, temp_filename]
|
||||
cmd = [encodeArgument(i) for i in cmd]
|
||||
|
||||
|
||||
@@ -49,12 +49,16 @@ def calc_sha256sum(path):
|
||||
h.update(mv[:n])
|
||||
return h.hexdigest()
|
||||
|
||||
to_screen('Current Build Hash %s' % calc_sha256sum(sys.executable))
|
||||
|
||||
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
|
||||
to_screen('It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Please use that to update.')
|
||||
return
|
||||
|
||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||
# though symlinks are not followed so that we need to do this manually
|
||||
# with help of realpath
|
||||
filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
|
||||
to_screen('Current Build Hash %s' % calc_sha256sum(filename))
|
||||
|
||||
# Download and check versions info
|
||||
try:
|
||||
version_info = opener.open(JSON_URL).read().decode('utf-8')
|
||||
@@ -103,11 +107,6 @@ def get_sha256sum(bin_or_exe, version):
|
||||
(i[1] for i in hashes if i[0] == 'yt-dlp%s' % label),
|
||||
None)
|
||||
|
||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||
# though symlinks are not followed so that we need to do this manually
|
||||
# with help of realpath
|
||||
filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % filename)
|
||||
return
|
||||
@@ -198,28 +197,18 @@ def get_sha256sum(bin_or_exe, version):
|
||||
to_screen('Visit https://github.com/yt-dlp/yt-dlp/releases/latest')
|
||||
return
|
||||
|
||||
expected_sum = get_sha256sum('zip', py_ver)
|
||||
if expected_sum and hashlib.sha256(newcontent).hexdigest() != expected_sum:
|
||||
to_screen('ERROR: unable to verify the new zip')
|
||||
to_screen('Visit https://github.com/yt-dlp/yt-dlp/releases/latest')
|
||||
return
|
||||
|
||||
try:
|
||||
with open(filename + '.new', 'wb') as outf:
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(newcontent)
|
||||
except (IOError, OSError):
|
||||
if verbose:
|
||||
to_screen(encode_compat_str(traceback.format_exc()))
|
||||
to_screen('ERROR: unable to write the new version')
|
||||
return
|
||||
|
||||
expected_sum = get_sha256sum('zip', py_ver)
|
||||
if expected_sum and calc_sha256sum(filename + '.new') != expected_sum:
|
||||
to_screen('ERROR: unable to verify the new zip')
|
||||
to_screen('Visit https://github.com/yt-dlp/yt-dlp/releases/latest')
|
||||
try:
|
||||
os.remove(filename + '.new')
|
||||
except OSError:
|
||||
to_screen('ERROR: unable to remove corrupt zip')
|
||||
return
|
||||
|
||||
try:
|
||||
os.rename(filename + '.new', filename)
|
||||
except OSError:
|
||||
to_screen('ERROR: unable to overwrite current version')
|
||||
return
|
||||
|
||||
|
||||
@@ -1836,7 +1836,7 @@ def write_json_file(obj, fn):
|
||||
|
||||
try:
|
||||
with tf:
|
||||
json.dump(obj, tf)
|
||||
json.dump(obj, tf, default=repr)
|
||||
if sys.platform == 'win32':
|
||||
# Need to remove existing file on Windows, else os.rename raises
|
||||
# WindowsError or FileExistsError.
|
||||
@@ -2423,6 +2423,15 @@ def __init__(self, msg, exc_info=None):
|
||||
self.exc_info = exc_info
|
||||
|
||||
|
||||
class EntryNotInPlaylist(YoutubeDLError):
|
||||
"""Entry not in playlist exception.
|
||||
|
||||
This exception will be thrown by YoutubeDL when a requested entry
|
||||
is not found in the playlist info_dict
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class SameFileError(YoutubeDLError):
|
||||
"""Same File exception.
|
||||
|
||||
@@ -4106,6 +4115,7 @@ def parse_age_limit(s):
|
||||
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
|
||||
if m:
|
||||
return int(m.group('age'))
|
||||
s = s.upper()
|
||||
if s in US_RATINGS:
|
||||
return US_RATINGS[s]
|
||||
m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
|
||||
@@ -4182,8 +4192,10 @@ def q(qid):
|
||||
|
||||
DEFAULT_OUTTMPL = {
|
||||
'default': '%(title)s [%(id)s].%(ext)s',
|
||||
'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
|
||||
}
|
||||
OUTTMPL_TYPES = {
|
||||
'chapter': None,
|
||||
'subtitle': None,
|
||||
'thumbnail': None,
|
||||
'description': 'description',
|
||||
@@ -4193,6 +4205,20 @@ def q(qid):
|
||||
'pl_infojson': 'info.json',
|
||||
}
|
||||
|
||||
# As of [1] format syntax is:
|
||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
||||
FORMAT_RE = r'''(?x)
|
||||
(?<!%)
|
||||
%
|
||||
\({0}\) # mapping key
|
||||
(?:[#0\-+ ]+)? # conversion flags (optional)
|
||||
(?:\d+)? # minimum field width (optional)
|
||||
(?:\.\d+)? # precision (optional)
|
||||
[hlL]? # length modifier (optional)
|
||||
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
|
||||
'''
|
||||
|
||||
|
||||
def limit_length(s, length):
|
||||
""" Add ellipses to overly long strings """
|
||||
@@ -4692,36 +4718,26 @@ def cli_valueless_option(params, command_option, param, expected_value=True):
|
||||
return [command_option] if param == expected_value else []
|
||||
|
||||
|
||||
def cli_configuration_args(argdict, key, default=[], exe=None, use_default_arg=True):
|
||||
# use_default_arg can be True, False, or 'no_compat'
|
||||
def cli_configuration_args(argdict, keys, default=[], use_compat=True):
|
||||
if isinstance(argdict, (list, tuple)): # for backward compatibility
|
||||
if use_default_arg is True:
|
||||
if use_compat:
|
||||
return argdict
|
||||
else:
|
||||
argdict = None
|
||||
|
||||
if argdict is None:
|
||||
return default
|
||||
assert isinstance(argdict, dict)
|
||||
|
||||
key = key.lower()
|
||||
args = exe_args = None
|
||||
if exe is not None:
|
||||
assert isinstance(exe, compat_str)
|
||||
exe = exe.lower()
|
||||
args = argdict.get('%s+%s' % (key, exe))
|
||||
if args is None:
|
||||
exe_args = argdict.get(exe)
|
||||
|
||||
if args is None:
|
||||
args = argdict.get(key) if key != exe else None
|
||||
if args is None and exe_args is None:
|
||||
args = argdict.get('default', default) if use_default_arg else default
|
||||
|
||||
args, exe_args = args or [], exe_args or []
|
||||
assert isinstance(args, (list, tuple))
|
||||
assert isinstance(exe_args, (list, tuple))
|
||||
return args + exe_args
|
||||
assert isinstance(keys, (list, tuple))
|
||||
for key_list in keys:
|
||||
if isinstance(key_list, compat_str):
|
||||
key_list = (key_list,)
|
||||
arg_list = list(filter(
|
||||
lambda x: x is not None,
|
||||
[argdict.get(key.lower()) for key in key_list]))
|
||||
if arg_list:
|
||||
return [arg for args in arg_list for arg in args]
|
||||
return default
|
||||
|
||||
|
||||
class ISO639Utils(object):
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.03.03.1'
|
||||
__version__ = '2021.03.24'
|
||||
|
||||
Reference in New Issue
Block a user