mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-16 22:25:40 +07:00
Compare commits
143 Commits
2022.03.08
...
2022.04.08
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dee1d65dc3 | ||
|
|
7884ade65e | ||
|
|
89fabf1125 | ||
|
|
11e1c2e3f8 | ||
|
|
ebc7d3ff1f | ||
|
|
d8a58ddce7 | ||
|
|
4d57133095 | ||
|
|
9b8b7a7b5e | ||
|
|
ab0970b233 | ||
|
|
b52e788eb2 | ||
|
|
316f2650f8 | ||
|
|
bd4073c535 | ||
|
|
22fba53fbd | ||
|
|
61d3665d9d | ||
|
|
870efdee28 | ||
|
|
b506289fe2 | ||
|
|
b63837bce0 | ||
|
|
fcfa8853e4 | ||
|
|
06b1628d3e | ||
|
|
da1ffde15d | ||
|
|
42a4f21a03 | ||
|
|
8973767198 | ||
|
|
0edb3e336c | ||
|
|
ce0593ef61 | ||
|
|
a44ca5a470 | ||
|
|
0a8a7e68fa | ||
|
|
f4d706a931 | ||
|
|
5fa3c9a88f | ||
|
|
04f3fd2c89 | ||
|
|
85e801a9db | ||
|
|
5127e92a94 | ||
|
|
18eac302a2 | ||
|
|
12e022d074 | ||
|
|
265e586d96 | ||
|
|
fbfde1c3e6 | ||
|
|
dc57e74a7f | ||
|
|
a17526e427 | ||
|
|
ad210f4fd4 | ||
|
|
c8e856a551 | ||
|
|
c085e4ec47 | ||
|
|
4c268f9cb7 | ||
|
|
5d45484cc7 | ||
|
|
e6f868a63c | ||
|
|
c4f60dd7cd | ||
|
|
f189faf1ce | ||
|
|
504f789ad5 | ||
|
|
bb5a7cb8ad | ||
|
|
c418e6b5a6 | ||
|
|
11078c6d57 | ||
|
|
5d0aeac0e9 | ||
|
|
180c81509f | ||
|
|
ab2579bb45 | ||
|
|
48e15bb6b1 | ||
|
|
af4944d84b | ||
|
|
e7870111e8 | ||
|
|
8a7f68d0b1 | ||
|
|
9139d2fae0 | ||
|
|
bdd60588b0 | ||
|
|
f5f15c9993 | ||
|
|
cb96c5be70 | ||
|
|
90137ca4be | ||
|
|
1c1b2f96ae | ||
|
|
47b8bf207b | ||
|
|
4628a3aa75 | ||
|
|
5b4bb715e6 | ||
|
|
1235d333ab | ||
|
|
18e4940825 | ||
|
|
c0b6e5c74d | ||
|
|
727029c508 | ||
|
|
5c3895fff1 | ||
|
|
fd2ad7cb24 | ||
|
|
4a3175fc4c | ||
|
|
5cf34021f5 | ||
|
|
34baa9fdf0 | ||
|
|
6db9c4d57d | ||
|
|
3cea3edd1a | ||
|
|
b1a7cd056a | ||
|
|
28787f16c6 | ||
|
|
1fb707badb | ||
|
|
a3f2445e29 | ||
|
|
ae72962643 | ||
|
|
ae6a1b9585 | ||
|
|
231025c463 | ||
|
|
700ccbe3f1 | ||
|
|
12a64f2777 | ||
|
|
b8f2f8f6b3 | ||
|
|
af14914baa | ||
|
|
ea5ca8e7fc | ||
|
|
c2d2ee40eb | ||
|
|
c70c418d33 | ||
|
|
b9c7b1e9b4 | ||
|
|
d5820461e8 | ||
|
|
8a23db9519 | ||
|
|
1f1df1251e | ||
|
|
84842aee2b | ||
|
|
be4685ab7b | ||
|
|
e6552207da | ||
|
|
a2e77303e3 | ||
|
|
510809f1aa | ||
|
|
f4ad919298 | ||
|
|
eeb2a770f3 | ||
|
|
0c14d66ad9 | ||
|
|
52efa4b312 | ||
|
|
028f6437f1 | ||
|
|
43c38abd1f | ||
|
|
e4b98809cf | ||
|
|
16c620bc55 | ||
|
|
5a373d9768 | ||
|
|
7e6a187096 | ||
|
|
3f168f0e45 | ||
|
|
7bdcb4a40e | ||
|
|
497a6c5f57 | ||
|
|
4b3c5d1b81 | ||
|
|
ec47c12f69 | ||
|
|
25791435b7 | ||
|
|
4e34889f1c | ||
|
|
a1b2d84360 | ||
|
|
5dbc77df26 | ||
|
|
d71fd41249 | ||
|
|
d69e55c1d8 | ||
|
|
9f2a6352ea | ||
|
|
aeb21b98f1 | ||
|
|
b3edc8068e | ||
|
|
17322130a9 | ||
|
|
5ca764c506 | ||
|
|
e880c92c65 | ||
|
|
a825ffbffa | ||
|
|
592b748582 | ||
|
|
cf4f42cb97 | ||
|
|
da1d734fbe | ||
|
|
2b38f7b2bc | ||
|
|
76aa991374 | ||
|
|
24e3d87431 | ||
|
|
63b2f88bc7 | ||
|
|
07ff290dce | ||
|
|
51c22ef4e2 | ||
|
|
33b8c411bc | ||
|
|
10331a2672 | ||
|
|
6e6beffd04 | ||
|
|
e491d06d34 | ||
|
|
7a0ba75857 | ||
|
|
e248be3319 | ||
|
|
ff91cf7483 |
8
.editorconfig
Normal file
8
.editorconfig
Normal file
@@ -0,0 +1,8 @@
|
||||
root = true
|
||||
|
||||
[**.py]
|
||||
charset = utf-8
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -2,3 +2,5 @@
|
||||
|
||||
Makefile* text whitespace=-tab-in-indent
|
||||
*.sh text eol=lf
|
||||
*.md diff=markdown
|
||||
*.py diff=python
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -51,12 +51,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.03.08.1 (exe)
|
||||
[debug] yt-dlp version 2022.04.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.03.08.1)
|
||||
yt-dlp is up to date (2022.04.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -62,12 +62,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.03.08.1 (exe)
|
||||
[debug] yt-dlp version 2022.04.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.03.08.1)
|
||||
yt-dlp is up to date (2022.04.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a site feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -60,12 +60,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.03.08.1 (exe)
|
||||
[debug] yt-dlp version 2022.04.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.03.08.1)
|
||||
yt-dlp is up to date (2022.04.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -45,12 +45,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.03.08.1 (exe)
|
||||
[debug] yt-dlp version 2022.04.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.03.08.1)
|
||||
yt-dlp is up to date (2022.04.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
23
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
23
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -13,7 +13,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -30,3 +30,24 @@ body:
|
||||
placeholder: WRITE DESCRIPTION HERE
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: log
|
||||
attributes:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your feature request involves an existing yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.01 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/6_question.yml
vendored
2
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -35,7 +35,7 @@ body:
|
||||
attributes:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your question involes a yt-dlp command, provide the complete verbose output of that command.
|
||||
If your question involves a yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a site feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
|
||||
@@ -13,7 +13,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -30,3 +30,24 @@ body:
|
||||
placeholder: WRITE DESCRIPTION HERE
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: log
|
||||
attributes:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your feature request involves an existing yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.01 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
2
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
@@ -35,7 +35,7 @@ body:
|
||||
attributes:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your question involes a yt-dlp command, provide the complete verbose output of that command.
|
||||
If your question involves a yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -116,3 +116,6 @@ yt-dlp.zip
|
||||
ytdlp_plugins/extractor/*
|
||||
!ytdlp_plugins/extractor/__init__.py
|
||||
!ytdlp_plugins/extractor/sample.py
|
||||
ytdlp_plugins/postprocessor/*
|
||||
!ytdlp_plugins/postprocessor/__init__.py
|
||||
!ytdlp_plugins/postprocessor/sample.py
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
# .readthedocs.yaml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# Optionally build your docs in additional formats such as PDF
|
||||
formats:
|
||||
- epub
|
||||
- pdf
|
||||
- htmlzip
|
||||
|
||||
# Optionally set the version of Python and requirements required to build your docs
|
||||
python:
|
||||
version: 3
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
@@ -534,13 +534,13 @@ #### Example
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
TITLE_RE = r'<h1>([^<]+)</h1>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
@@ -643,7 +643,7 @@ ### Use convenience conversion and parsing functions
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get`, `dict_get` and `traverse_obj` for safe metadata extraction from parsed JSON.
|
||||
Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
|
||||
17
CONTRIBUTORS
17
CONTRIBUTORS
@@ -214,3 +214,20 @@ pycabbage
|
||||
regarten
|
||||
Ronnnny
|
||||
schn0sch
|
||||
s0u1h
|
||||
MrRawes
|
||||
cffswb
|
||||
danielyli
|
||||
1-Byte
|
||||
mehq
|
||||
dzek69
|
||||
aaearon
|
||||
panatexxa
|
||||
kmark
|
||||
un-def
|
||||
goggle
|
||||
Soebb
|
||||
Fam0r
|
||||
bohwaz
|
||||
dodrian
|
||||
vvto33
|
||||
|
||||
140
Changelog.md
140
Changelog.md
@@ -11,6 +11,144 @@ # Instuctions for creating release
|
||||
-->
|
||||
|
||||
|
||||
### 2022.04.08
|
||||
|
||||
* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Treat multiple `--match-filters` as OR
|
||||
* File locking improvevemnts:
|
||||
* Do not lock downloading file on Windows
|
||||
* Do not prevent download if locking is unsupported
|
||||
* Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan)
|
||||
* Fix non-blocking non-exclusive lock
|
||||
* De-prioritize automatic-subtitles when no `--sub-lang` is given
|
||||
* Exit after `--dump-user-agent`
|
||||
* Fallback to video-only format when selecting by extension
|
||||
* Fix `--abort-on-error` for subtitles
|
||||
* Fix `--no-overwrite` for playlist infojson
|
||||
* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger)
|
||||
* Fix `--sleep-interval`
|
||||
* Fix `--throttled-rate`
|
||||
* Fix `autonumber`
|
||||
* Fix case of `http_headers`
|
||||
* Fix filepath sanitization in `--print-to-file`
|
||||
* Handle float in `--wait-for-video`
|
||||
* Ignore `mhtml` formats from `-f mergeall`
|
||||
* Ignore format-specific fields in initial pass of `--match-filter`
|
||||
* Protect stdout from unexpected progress and console-title
|
||||
* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Remove incorrect warning for `--dateafter`
|
||||
* Show warning when all media formats have DRM
|
||||
* [downloader] Fix invocation of `HttpieFD`
|
||||
* [http] Fix #3215
|
||||
* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan)
|
||||
* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor] Add `_perform_login` function
|
||||
* [extractor] Allow control characters inside json
|
||||
* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [ffmpeg] Cache version data
|
||||
* [FFmpegConcat] Ensure final directory exists
|
||||
* [FfmpegMetadata] Write id3v1 tags
|
||||
* [FFmpegVideoConvertor] Add more formats to `--remux-video`
|
||||
* [FFmpegVideoConvertor] Ensure all streams are copied
|
||||
* [MetadataParser] Validate outtmpl early
|
||||
* [outtmpl] Fix replacement/default when used with alternate
|
||||
* [outtmpl] Limit changes during sanitization
|
||||
* [phantomjs] Fix bug
|
||||
* [test] Add `test_locked_file`
|
||||
* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h)
|
||||
* [utils] `traverse_obj`: Allow filtering by value
|
||||
* [utils] Add `filter_dict`, `get_first`, `try_call`
|
||||
* [utils] ExtractorError: Fix for older python versions
|
||||
* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes)
|
||||
* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli)
|
||||
* [docs] Remove readthedocs
|
||||
* [build] Add `requirements.txt` to pip distributions
|
||||
* [cleanup, postprocessor] Create `_download_json`
|
||||
* [cleanup, vimeo] Fix tests
|
||||
* [cleanup] Misc fixes and minor cleanup
|
||||
* [cleanup] Use `_html_extract_title`
|
||||
* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [arte] Add `format_note` to m3u8 formats
|
||||
* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte)
|
||||
* [BanBye] Add extractor by [mehq](https://github.com/mehq)
|
||||
* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69)
|
||||
* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio)
|
||||
* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon)
|
||||
* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon)
|
||||
* [Jable] Add extractors by [mehq](https://github.com/mehq)
|
||||
* [LastFM] Add extractors by [mehq](https://github.com/mehq)
|
||||
* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa)
|
||||
* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark)
|
||||
* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
|
||||
* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi)
|
||||
* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle)
|
||||
* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi)
|
||||
* [BRMediathek] Fix VALID_URL
|
||||
* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1)
|
||||
* [crunchyroll] Fix inheritance
|
||||
* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb)
|
||||
* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [ellentube] Extract subtitles from manifest
|
||||
* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan)
|
||||
* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz)
|
||||
* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0
|
||||
* [niconico] Fix extraction of thumbnails and uploader (#3266)
|
||||
* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk)
|
||||
* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian)
|
||||
* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo)
|
||||
* [rumble] unescape title
|
||||
* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen)
|
||||
* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui)
|
||||
* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [TikTokVM] Fix redirect to user URL
|
||||
* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [TVer] Support landing page by [vvto33](https://github.com/vvto33)
|
||||
* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [veo] Fix `_VALID_URL`
|
||||
* [Veo] Fix extractor by [i6t](https://github.com/i6t)
|
||||
* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29)
|
||||
* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
|
||||
* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz)
|
||||
* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0)
|
||||
* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube] Add extractor-arg to skip auto-translated subs
|
||||
* [youtube] Avoid false positives when detecting damaged formats
|
||||
* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev)
|
||||
* [youtube] Fix auto-translated automatic captions
|
||||
* [youtube] Fix pagination of `membership` tab
|
||||
* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:tab] Minor improvements
|
||||
* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [Zattoo] Fix extractors by [goggle](https://github.com/goggle)
|
||||
* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
|
||||
|
||||
### 2022.03.08.1
|
||||
|
||||
* [cleanup] Refactor `__init__.py`
|
||||
@@ -34,7 +172,7 @@ ### 2022.03.08
|
||||
* Set `webpage_url_...` from `webpage_url` and not input URL
|
||||
* Tolerate failure to `--write-link` due to unknown URL
|
||||
* [aria2c] Add `--http-accept-gzip=true`
|
||||
* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev)
|
||||
* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev)
|
||||
* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley)
|
||||
* [devscripts] Improve `prepare_manpage`
|
||||
* [downloader] Do not use aria2c for non-native `m3u8`
|
||||
|
||||
@@ -29,6 +29,7 @@ ## [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
|
||||
* Added support for downloading YoutubeWebArchive videos
|
||||
* Added support for new websites MainStreaming, PRX, nzherald, etc
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,5 +5,6 @@ include README.md
|
||||
include completions/*/*
|
||||
include supportedsites.md
|
||||
include yt-dlp.1
|
||||
include requirements.txt
|
||||
recursive-include devscripts *
|
||||
recursive-include test *
|
||||
|
||||
127
README.md
127
README.md
@@ -3,15 +3,14 @@
|
||||
|
||||
[](#readme)
|
||||
|
||||
[](#release-files "Release")
|
||||
[](LICENSE "License")
|
||||
[](Collaborators.md#collaborators "Donate")
|
||||
[](https://readthedocs.org/projects/yt-dlp/ "Docs")
|
||||
[](supportedsites.md "Supported Sites")
|
||||
[](#release-files "Release")
|
||||
[](https://pypi.org/project/yt-dlp "PyPi")
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[](https://discord.gg/H5MNcFW63r "Discord")
|
||||
[](Collaborators.md#collaborators "Donate")
|
||||
[](https://matrix.to/#/#yt-dlp:matrix.org "Matrix")
|
||||
[](https://discord.gg/H5MNcFW63r "Discord")
|
||||
[](supportedsites.md "Supported Sites")
|
||||
[](LICENSE "License")
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
|
||||
@@ -71,13 +70,13 @@
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5add3f4](https://github.com/ytdl-org/youtube-dl/commit/5add3f4373287e6346ca3551239edab549284db3)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Based on **youtube-dl 2021.12.17 [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
* **Youtube improvements**:
|
||||
* All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content
|
||||
@@ -126,6 +125,7 @@ ### Differences in default behavior
|
||||
|
||||
* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
|
||||
* `avconv` is not supported as an alternative to `ffmpeg`
|
||||
* yt-dlp stores config files in slightly different locations to youtube-dl. See [configuration](#configuration) for a list of correct locations
|
||||
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
|
||||
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
|
||||
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
|
||||
@@ -144,6 +144,8 @@ ### Differences in default behavior
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
||||
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
||||
* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
* `--compat-options all`: Use all compat options
|
||||
@@ -202,7 +204,7 @@ ### With [PIP](https://pypi.org/project/pip)
|
||||
|
||||
If you want to be on the cutting edge, you can also install the master branch with:
|
||||
```
|
||||
python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip
|
||||
python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
|
||||
```
|
||||
|
||||
Note that on some systems, you may need to use `py` or `python` instead of `python3`
|
||||
@@ -230,14 +232,14 @@ #### Recommended
|
||||
|
||||
File|Description
|
||||
:---|:---
|
||||
[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**)
|
||||
[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **Linux/BSD**)
|
||||
[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**)
|
||||
[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable (recommended for **MacOS**)
|
||||
|
||||
#### Alternatives
|
||||
|
||||
File|Description
|
||||
:---|:---
|
||||
[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable
|
||||
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary
|
||||
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.<br/> Does not contain `pycryptodomex`, needs VC++14
|
||||
[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
|
||||
@@ -263,21 +265,22 @@ ## DEPENDENCIES
|
||||
While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
|
||||
|
||||
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
|
||||
* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
|
||||
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
|
||||
* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
|
||||
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
|
||||
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
|
||||
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
|
||||
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
|
||||
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
|
||||
* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md)
|
||||
* Any external downloader that you want to use with `--downloader`
|
||||
|
||||
To use or redistribute the dependencies, you must agree to their respective licensing terms.
|
||||
|
||||
The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included.
|
||||
The Windows and MacOS standalone release binaries are already built with the python interpreter and all optional python packages (marked with \*) included.
|
||||
|
||||
<!-- TODO: ffmpeg has merged this patch. Remove this note once there is new release -->
|
||||
**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
@@ -367,8 +370,7 @@ ## General Options:
|
||||
available. Pass the minimum number of
|
||||
seconds (or range) to wait between retries
|
||||
--no-wait-for-video Do not wait for scheduled streams (default)
|
||||
--mark-watched Mark videos watched (even with --simulate).
|
||||
Currently only supported for YouTube
|
||||
--mark-watched Mark videos watched (even with --simulate)
|
||||
--no-mark-watched Do not mark videos watched (default)
|
||||
--no-colors Do not emit color codes in output
|
||||
--compat-options OPTS Options that can help keep compatibility
|
||||
@@ -430,24 +432,24 @@ ## Video Selection:
|
||||
--dateafter DATE Download only videos uploaded on or after
|
||||
this date. The date formats accepted is the
|
||||
same as --date
|
||||
--match-filter FILTER Generic video filter. Any field (see
|
||||
--match-filters FILTER Generic video filter. Any field (see
|
||||
"OUTPUT TEMPLATE") can be compared with a
|
||||
number or a string using the operators
|
||||
defined in "Filtering formats". You can
|
||||
also simply specify a field to match if the
|
||||
field is present and "!field" to check if
|
||||
the field is not present. In addition,
|
||||
Python style regular expression matching
|
||||
can be done using "~=", and multiple
|
||||
filters can be checked with "&". Use a "\"
|
||||
to escape "&" or quotes if needed. Eg:
|
||||
--match-filter "!is_live & like_count>?100
|
||||
& description~='(?i)\bcats \& dogs\b'"
|
||||
matches only videos that are not live, has
|
||||
a like count more than 100 (or the like
|
||||
field is not available), and also has a
|
||||
description that contains the phrase "cats
|
||||
& dogs" (ignoring case)
|
||||
field is present, use "!field" to check if
|
||||
the field is not present, and "&" to check
|
||||
multiple conditions. Use a "\" to escape
|
||||
"&" or quotes if needed. If used multiple
|
||||
times, the filter matches if atleast one of
|
||||
the conditions are met. Eg: --match-filter
|
||||
!is_live --match-filter "like_count>?100 &
|
||||
description~='(?i)\bcats \& dogs\b'"
|
||||
matches only videos that are not live OR
|
||||
those that have a like count more than 100
|
||||
(or the like field is not available) and
|
||||
also has a description that contains the
|
||||
phrase "cats & dogs" (ignoring case)
|
||||
--no-match-filter Do not use generic video filter (default)
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist
|
||||
@@ -781,8 +783,8 @@ ## Video Format Options:
|
||||
containers irrespective of quality
|
||||
--no-prefer-free-formats Don't give any special preference to free
|
||||
containers (default)
|
||||
--check-formats Check that the selected formats are
|
||||
actually downloadable
|
||||
--check-formats Make sure formats are selected only from
|
||||
those that are actually downloadable
|
||||
--check-all-formats Check all formats for whether they are
|
||||
actually downloadable
|
||||
--no-check-formats Do not check that the formats are actually
|
||||
@@ -839,15 +841,17 @@ ## Post-Processing Options:
|
||||
(requires ffmpeg and ffprobe)
|
||||
--audio-format FORMAT Specify audio format to convert the audio
|
||||
to when -x is used. Currently supported
|
||||
formats are: best (default) or one of
|
||||
best|aac|flac|mp3|m4a|opus|vorbis|wav|alac
|
||||
--audio-quality QUALITY Specify ffmpeg audio quality, insert a
|
||||
formats are: best (default) or one of aac,
|
||||
flac, mp3, m4a, opus, vorbis, wav, alac
|
||||
--audio-quality QUALITY Specify ffmpeg audio quality to use when
|
||||
converting the audio with -x. Insert a
|
||||
value between 0 (best) and 10 (worst) for
|
||||
VBR or a specific bitrate like 128K
|
||||
(default 5)
|
||||
--remux-video FORMAT Remux the video into another container if
|
||||
necessary (currently supported: mp4|mkv|flv
|
||||
|webm|mov|avi|mp3|mka|m4a|ogg|opus). If
|
||||
necessary (currently supported: mp4, mkv,
|
||||
flv, webm, mov, avi, mka, ogg, aac, flac,
|
||||
mp3, m4a, opus, vorbis, wav, alac). If
|
||||
target container does not support the
|
||||
video/audio codec, remuxing will fail. You
|
||||
can specify multiple rules; Eg.
|
||||
@@ -947,10 +951,10 @@ ## Post-Processing Options:
|
||||
option can be used multiple times
|
||||
--no-exec Remove any previously defined --exec
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(currently supported: srt, vtt, ass, lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
--convert-thumbnails FORMAT Convert the thumbnails to another format
|
||||
(currently supported: jpg|png|webp)
|
||||
(currently supported: jpg, png, webp)
|
||||
--split-chapters Split video into multiple files based on
|
||||
internal chapters. The "chapter:" prefix
|
||||
can be used with "--paths" and "--output"
|
||||
@@ -1154,11 +1158,11 @@ # OUTPUT TEMPLATE
|
||||
- `license` (string): License name the video is licensed under
|
||||
- `creator` (string): The creator of the video
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
|
||||
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
|
||||
- `release_date` (string): The date (YYYYMMDD) when the video was released
|
||||
- `release_date` (string): The date (YYYYMMDD) when the video was released in UTC
|
||||
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
|
||||
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified
|
||||
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
@@ -1363,7 +1367,7 @@ # FORMAT SELECTION
|
||||
- `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
|
||||
- `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
|
||||
- `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]`
|
||||
- `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]`
|
||||
- `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354))
|
||||
- `w*`, `worst*`: Select the worst quality format that contains either a video or an audio
|
||||
- `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
|
||||
- `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]`
|
||||
@@ -1371,7 +1375,7 @@ # FORMAT SELECTION
|
||||
- `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
|
||||
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
||||
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
|
||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||
|
||||
@@ -1637,7 +1641,11 @@ # Regex example
|
||||
# Set title as "Series name S01E05"
|
||||
$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
|
||||
|
||||
# Set "comment" field in video metadata using description instead of webpage_url
|
||||
# Prioritize uploader as the "artist" field in video metadata
|
||||
$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --add-metadata
|
||||
|
||||
# Set "comment" field in video metadata using description instead of webpage_url,
|
||||
# handling multiple lines correctly
|
||||
$ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --add-metadata
|
||||
|
||||
# Remove "formats" field from the infojson by setting it to an empty string
|
||||
@@ -1650,20 +1658,18 @@ # Replace all spaces and "_" in title and uploader with a `-`
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
The following extractors use this feature:
|
||||
|
||||
#### youtube
|
||||
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients.
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (Eg: `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but tv_embedded and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`.
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total.
|
||||
* `max_comment_depth` Maximum depth for nested comments. YouTube supports depths 1 or 2 (default)
|
||||
* **Deprecated**: Set `max-replies` to `0` or `all` in `max_comments` instead (e.g. `max_comments=all,all,0` to get no replies)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
@@ -1677,7 +1683,7 @@ #### crunchyroll
|
||||
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyroll:beta
|
||||
#### crunchyrollbeta
|
||||
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
|
||||
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
|
||||
@@ -1685,6 +1691,9 @@ #### crunchyroll:beta
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
#### niconico
|
||||
* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
|
||||
|
||||
#### youtubewebarchive
|
||||
* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
|
||||
|
||||
@@ -1739,7 +1748,7 @@ # EMBEDDING YT-DLP
|
||||
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L191).
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L197).
|
||||
|
||||
Here's a more complete example demonstrating various functionality:
|
||||
|
||||
|
||||
@@ -24,10 +24,9 @@ def main():
|
||||
def gen_ies_md(ies):
|
||||
for ie in ies:
|
||||
ie_md = '**{0}**'.format(ie.IE_NAME)
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
if ie.IE_DESC is False:
|
||||
continue
|
||||
if ie_desc is not None:
|
||||
if ie.IE_DESC is not None:
|
||||
ie_md += ': {0}'.format(ie.IE_DESC)
|
||||
search_key = getattr(ie, 'SEARCH_KEY', None)
|
||||
if search_key is not None:
|
||||
|
||||
1
docs/.gitignore
vendored
1
docs/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
_build/
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
```{include} ../Changelog.md
|
||||
```
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
```{include} ../Collaborators.md
|
||||
```
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
```{include} ../Contributing.md
|
||||
```
|
||||
@@ -1,6 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
# LICENSE
|
||||
```{include} ../LICENSE
|
||||
```
|
||||
177
docs/Makefile
177
docs/Makefile
@@ -1,177 +0,0 @@
|
||||
# Makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
PAPER =
|
||||
BUILDDIR = _build
|
||||
|
||||
# User-friendly check for sphinx-build
|
||||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||
endif
|
||||
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
# the i18n builder cannot share the environment and doctrees with the others
|
||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
|
||||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " singlehtml to make a single large HTML file"
|
||||
@echo " pickle to make pickle files"
|
||||
@echo " json to make JSON files"
|
||||
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||
@echo " qthelp to make HTML files and a qthelp project"
|
||||
@echo " devhelp to make HTML files and a Devhelp project"
|
||||
@echo " epub to make an epub"
|
||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||
@echo " text to make text files"
|
||||
@echo " man to make manual pages"
|
||||
@echo " texinfo to make Texinfo files"
|
||||
@echo " info to make Texinfo files and run them through makeinfo"
|
||||
@echo " gettext to make PO message catalogs"
|
||||
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||
@echo " xml to make Docutils-native XML files"
|
||||
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||
@echo " linkcheck to check all external links for integrity"
|
||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
|
||||
html:
|
||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||
|
||||
singlehtml:
|
||||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||
|
||||
pickle:
|
||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||
@echo
|
||||
@echo "Build finished; now you can process the pickle files."
|
||||
|
||||
json:
|
||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||
@echo
|
||||
@echo "Build finished; now you can process the JSON files."
|
||||
|
||||
htmlhelp:
|
||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||
|
||||
qthelp:
|
||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/yt-dlp.qhcp"
|
||||
@echo "To view the help file:"
|
||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/yt-dlp.qhc"
|
||||
|
||||
devhelp:
|
||||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||
@echo
|
||||
@echo "Build finished."
|
||||
@echo "To view the help file:"
|
||||
@echo "# mkdir -p $$HOME/.local/share/devhelp/yt-dlp"
|
||||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/yt-dlp"
|
||||
@echo "# devhelp"
|
||||
|
||||
epub:
|
||||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||
@echo
|
||||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||
|
||||
latex:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||
"(use \`make latexpdf' here to do that automatically)."
|
||||
|
||||
latexpdf:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through pdflatex..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
latexpdfja:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
text:
|
||||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||
@echo
|
||||
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||
|
||||
man:
|
||||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||
@echo
|
||||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||
|
||||
texinfo:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo
|
||||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||
"(use \`make info' here to do that automatically)."
|
||||
|
||||
info:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo "Running Texinfo files through makeinfo..."
|
||||
make -C $(BUILDDIR)/texinfo info
|
||||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||
|
||||
gettext:
|
||||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||
@echo
|
||||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||
|
||||
changes:
|
||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||
@echo
|
||||
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||
|
||||
linkcheck:
|
||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||
@echo
|
||||
@echo "Link check complete; look for any errors in the above output " \
|
||||
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||
|
||||
doctest:
|
||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||
@echo "Testing of doctests in the sources finished, look at the " \
|
||||
"results in $(BUILDDIR)/doctest/output.txt."
|
||||
|
||||
xml:
|
||||
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||
@echo
|
||||
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||
|
||||
pseudoxml:
|
||||
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||
@echo
|
||||
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
||||
@@ -1,2 +0,0 @@
|
||||
```{include} ../README.md
|
||||
```
|
||||
68
docs/conf.py
68
docs/conf.py
@@ -1,68 +0,0 @@
|
||||
# coding: utf-8
|
||||
#
|
||||
# yt-dlp documentation build configuration file
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Allows to import yt-dlp
|
||||
sys.path.insert(0, os.path.abspath('..'))
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'myst_parser',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'README'
|
||||
|
||||
# General information about the project.
|
||||
project = u'yt-dlp'
|
||||
author = u'yt-dlp'
|
||||
copyright = u'UNLICENSE'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
from yt_dlp.version import __version__
|
||||
version = __version__
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = version
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'default'
|
||||
|
||||
# Disable highlights
|
||||
highlight_language = 'none'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
# html_static_path = ['_static']
|
||||
|
||||
# Enable heading anchors
|
||||
myst_heading_anchors = 4
|
||||
|
||||
# Suppress heading warnings
|
||||
suppress_warnings = [
|
||||
'myst.header',
|
||||
]
|
||||
@@ -1 +0,0 @@
|
||||
myst-parser
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
```{include} ../supportedsites.md
|
||||
```
|
||||
@@ -1,6 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
# ytdlp_plugins
|
||||
|
||||
See [https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins](https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins).
|
||||
@@ -74,7 +74,7 @@ def version_to_list(version):
|
||||
|
||||
|
||||
def dependency_options():
|
||||
dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
|
||||
dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi'] + collect_submodules('websockets')
|
||||
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
|
||||
|
||||
yield from (f'--hidden-import={module}' for module in dependencies)
|
||||
|
||||
@@ -2,4 +2,5 @@ mutagen
|
||||
pycryptodomex
|
||||
websockets
|
||||
brotli; platform_python_implementation=='CPython'
|
||||
brotlicffi; platform_python_implementation!='CPython'
|
||||
brotlicffi; platform_python_implementation!='CPython'
|
||||
certifi
|
||||
@@ -42,6 +42,7 @@ # Supported sites
|
||||
- **aenetworks:show**
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:live**: afreecatv.com
|
||||
- **afreecatv:user**
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
@@ -104,6 +105,8 @@ # Supported sites
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **BanBye**
|
||||
- **BanByeChannel**
|
||||
- **bandaichannel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
@@ -245,6 +248,7 @@ # Supported sites
|
||||
- **cpac:playlist**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Craftsy**
|
||||
- **CrooksAndLiars**
|
||||
- **CrowdBunker**
|
||||
- **CrowdBunkerChannel**
|
||||
@@ -263,6 +267,8 @@ # Supported sites
|
||||
- **curiositystream:collections**
|
||||
- **curiositystream:series**
|
||||
- **CWTV**
|
||||
- **Cybrary**
|
||||
- **CybraryCourse**
|
||||
- **Daftsex**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
@@ -484,6 +490,7 @@ # Supported sites
|
||||
- **Hungama**
|
||||
- **HungamaAlbumPlaylist**
|
||||
- **HungamaSong**
|
||||
- **huya:live**: huya.com
|
||||
- **Hypem**
|
||||
- **ign.com**
|
||||
- **IGNArticle**
|
||||
@@ -512,6 +519,8 @@ # Supported sites
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **ITProTV**
|
||||
- **ITProTVCourse**
|
||||
- **ITTF**
|
||||
- **ITV**
|
||||
- **ITVBTCC**
|
||||
@@ -520,6 +529,8 @@ # Supported sites
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **Jable**
|
||||
- **JablePlaylist**
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**
|
||||
@@ -555,6 +566,9 @@ # Supported sites
|
||||
- **la7.it:podcast**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **LastFM**
|
||||
- **LastFMPlaylist**
|
||||
- **LastFMUser**
|
||||
- **lbry**
|
||||
- **lbry:channel**
|
||||
- **LCI**
|
||||
@@ -603,6 +617,7 @@ # Supported sites
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **MangoTV**: 芒果TV
|
||||
- **ManotoTV**: Manoto TV (Episode)
|
||||
- **ManotoTVLive**: Manoto TV (Live)
|
||||
- **ManotoTVShow**: Manoto TV (Show)
|
||||
@@ -635,7 +650,6 @@ # Supported sites
|
||||
- **Metacritic**
|
||||
- **mewatch**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
- **microsoftstream**: Microsoft Stream
|
||||
- **mildom**: Record ongoing live by specific user in Mildom
|
||||
@@ -671,6 +685,7 @@ # Supported sites
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviepilot**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MSN**
|
||||
@@ -705,7 +720,6 @@ # Supported sites
|
||||
- **MyVideoGe**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **N1Info:article**
|
||||
- **N1InfoAsset**
|
||||
@@ -863,6 +877,9 @@ # Supported sites
|
||||
- **PalcoMP3:song**
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **Panopto**
|
||||
- **PanoptoList**
|
||||
- **PanoptoPlaylist**
|
||||
- **ParamountNetwork**
|
||||
- **ParamountPlus**
|
||||
- **ParamountPlusSeries**
|
||||
@@ -912,6 +929,7 @@ # Supported sites
|
||||
- **PlutoTV**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PokemonSoundLibrary**
|
||||
- **PokemonWatch**
|
||||
- **PokerGo**
|
||||
- **PokerGoCollection**
|
||||
@@ -957,8 +975,6 @@ # Supported sites
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Qub**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
- **R7Article**
|
||||
- **Radiko**
|
||||
@@ -1427,6 +1443,9 @@ # Supported sites
|
||||
- **Wakanim**
|
||||
- **Walla**
|
||||
- **WalyTV**
|
||||
- **wasdtv:clip**
|
||||
- **wasdtv:record**
|
||||
- **wasdtv:stream**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
@@ -1520,6 +1539,8 @@ # Supported sites
|
||||
- **Zapiks**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
- **ZattooMovies**
|
||||
- **ZattooRecordings**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **Zee5**
|
||||
|
||||
@@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict):
|
||||
|
||||
def sanitize_got_info_dict(got_dict):
|
||||
IGNORED_FIELDS = (
|
||||
# Format keys
|
||||
'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution',
|
||||
'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize',
|
||||
'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference',
|
||||
'language', 'language_preference', 'quality', 'source_preference', 'http_headers',
|
||||
'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
|
||||
|
||||
# RTMP formats
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
|
||||
*YoutubeDL._format_fields,
|
||||
|
||||
# Lists
|
||||
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
|
||||
|
||||
@@ -818,6 +818,8 @@ def expect_same_infodict(out):
|
||||
test('%(id&foo)s.bar', 'foo.bar')
|
||||
test('%(title&foo)s.bar', 'NA.bar')
|
||||
test('%(title&foo|baz)s.bar', 'baz.bar')
|
||||
test('%(x,id&foo|baz)s.bar', 'foo.bar')
|
||||
test('%(x,title&foo|baz)s.bar', 'baz.bar')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
@@ -931,7 +933,7 @@ def get_videos(filter_=None):
|
||||
res = get_videos()
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
def f(v):
|
||||
def f(v, incomplete):
|
||||
if v['id'] == '1':
|
||||
return None
|
||||
else:
|
||||
|
||||
@@ -12,11 +12,6 @@
|
||||
from yt_dlp.extractor import IqiyiIE
|
||||
|
||||
|
||||
class IqiyiIEWithCredentials(IqiyiIE):
|
||||
def _get_login_info(self):
|
||||
return 'foo', 'bar'
|
||||
|
||||
|
||||
class WarningLogger(object):
|
||||
def __init__(self):
|
||||
self.messages = []
|
||||
@@ -40,8 +35,8 @@ def test_iqiyi_sdk_interpreter(self):
|
||||
If `sign` is incorrect, /validate call throws an HTTP 556 error
|
||||
'''
|
||||
logger = WarningLogger()
|
||||
ie = IqiyiIEWithCredentials(FakeYDL({'logger': logger}))
|
||||
ie._login()
|
||||
ie = IqiyiIE(FakeYDL({'logger': logger}))
|
||||
ie._perform_login('foo', 'bar')
|
||||
self.assertTrue('unable to log in:' in logger.messages[0])
|
||||
|
||||
|
||||
|
||||
@@ -7,18 +7,19 @@
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
gen_extractors,
|
||||
)
|
||||
from yt_dlp.extractor import gen_extractor_classes
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
NO_LOGIN = InfoExtractor._perform_login
|
||||
|
||||
|
||||
class TestNetRc(unittest.TestCase):
|
||||
def test_netrc_present(self):
|
||||
for ie in gen_extractors():
|
||||
if not hasattr(ie, '_login'):
|
||||
for ie in gen_extractor_classes():
|
||||
if ie._perform_login is NO_LOGIN:
|
||||
continue
|
||||
self.assertTrue(
|
||||
hasattr(ie, '_NETRC_MACHINE'),
|
||||
ie._NETRC_MACHINE,
|
||||
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
|
||||
|
||||
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
locked_file,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
@@ -160,10 +161,12 @@ def test_sanitize_filename(self):
|
||||
sanitize_filename('New World record at 0:12:34'),
|
||||
'New World record at 0_12_34')
|
||||
|
||||
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
|
||||
|
||||
forbidden = '"\0\\/'
|
||||
for fc in forbidden:
|
||||
@@ -625,6 +628,8 @@ def test_parse_duration(self):
|
||||
self.assertEqual(parse_duration('3h 11m 53s'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
|
||||
self.assertEqual(parse_duration('62m45s'), 3765)
|
||||
self.assertEqual(parse_duration('6m59s'), 419)
|
||||
self.assertEqual(parse_duration('49s'), 49)
|
||||
@@ -1780,6 +1785,7 @@ def test_format_bytes(self):
|
||||
self.assertEqual(format_bytes(1024**6), '1.00EiB')
|
||||
self.assertEqual(format_bytes(1024**7), '1.00ZiB')
|
||||
self.assertEqual(format_bytes(1024**8), '1.00YiB')
|
||||
self.assertEqual(format_bytes(1024**9), '1024.00YiB')
|
||||
|
||||
def test_hide_login_info(self):
|
||||
self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
|
||||
@@ -1790,6 +1796,36 @@ def test_hide_login_info(self):
|
||||
self.assertEqual(Config.hide_login_info(['--username=foo']),
|
||||
['--username=PRIVATE'])
|
||||
|
||||
def test_locked_file(self):
|
||||
TEXT = 'test_locked_file\n'
|
||||
FILE = 'test_locked_file.ytdl'
|
||||
MODES = 'war' # Order is important
|
||||
|
||||
try:
|
||||
for lock_mode in MODES:
|
||||
with locked_file(FILE, lock_mode, False) as f:
|
||||
if lock_mode == 'r':
|
||||
self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
|
||||
else:
|
||||
f.write(TEXT)
|
||||
for test_mode in MODES:
|
||||
testing_write = test_mode != 'r'
|
||||
try:
|
||||
with locked_file(FILE, test_mode, False):
|
||||
pass
|
||||
except (BlockingIOError, PermissionError):
|
||||
if not testing_write: # FIXME
|
||||
print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
|
||||
continue
|
||||
self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
|
||||
else:
|
||||
self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
|
||||
finally:
|
||||
try:
|
||||
os.remove(FILE)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -65,6 +65,7 @@
|
||||
ExistingVideoReached,
|
||||
expand_path,
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
format_field,
|
||||
@@ -72,6 +73,7 @@
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
get_domain,
|
||||
has_certifi,
|
||||
HEADRequest,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
@@ -86,6 +88,7 @@
|
||||
MaxDownloadsReached,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
NO_DEFAULT,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
OUTTMPL_TYPES,
|
||||
@@ -512,23 +515,22 @@ class YoutubeDL(object):
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
))
|
||||
|
||||
_format_fields = {
|
||||
# NB: Keep in sync with the docstring of extractor/common.py
|
||||
'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
|
||||
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
_format_selection_exts = {
|
||||
'audio': {'m4a', 'mp3', 'ogg', 'aac'},
|
||||
'video': {'mp4', 'flv', 'webm', '3gp'},
|
||||
'storyboards': {'mhtml'},
|
||||
}
|
||||
|
||||
params = None
|
||||
_ies = {}
|
||||
_pps = {k: [] for k in POSTPROCESS_WHEN}
|
||||
_printed_messages = set()
|
||||
_first_webpage_request = True
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_playlist_level = 0
|
||||
_playlist_urls = set()
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params=None, auto_init=True):
|
||||
"""Create a FileDownloader object with the given options.
|
||||
@param auto_init Whether to load the default extractors and print header (if verbose).
|
||||
@@ -536,6 +538,7 @@ def __init__(self, params=None, auto_init=True):
|
||||
"""
|
||||
if params is None:
|
||||
params = {}
|
||||
self.params = params
|
||||
self._ies = {}
|
||||
self._ies_instances = {}
|
||||
self._pps = {k: [] for k in POSTPROCESS_WHEN}
|
||||
@@ -547,15 +550,21 @@ def __init__(self, params=None, auto_init=True):
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._num_videos = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self._err_file = sys.stderr
|
||||
self.params = params
|
||||
self._playlist_level = 0
|
||||
self._playlist_urls = set()
|
||||
self.cache = Cache(self)
|
||||
|
||||
windows_enable_vt_mode()
|
||||
self._out_files = {
|
||||
'error': sys.stderr,
|
||||
'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
|
||||
'console': None if compat_os_name == 'nt' else next(
|
||||
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
|
||||
}
|
||||
self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
|
||||
self._allow_colors = {
|
||||
'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
|
||||
'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
|
||||
type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
|
||||
for type_ in ('screen', 'error')
|
||||
}
|
||||
|
||||
if sys.version_info < (3, 6):
|
||||
@@ -620,7 +629,7 @@ def check_deprecated(param, option, suggestion):
|
||||
sp_kwargs = dict(
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=slave,
|
||||
stderr=self._err_file)
|
||||
stderr=self._out_files['error'])
|
||||
try:
|
||||
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
|
||||
except OSError:
|
||||
@@ -788,14 +797,24 @@ def _write_string(self, message, out=None, only_once=False):
|
||||
self._printed_messages.add(message)
|
||||
write_string(message, out=out, encoding=self.params.get('encoding'))
|
||||
|
||||
def to_stdout(self, message, skip_eol=False, quiet=False):
|
||||
def to_stdout(self, message, skip_eol=False, quiet=None):
|
||||
"""Print message to stdout"""
|
||||
if quiet is not None:
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
|
||||
self._write_string(
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._out_files['print'])
|
||||
|
||||
def to_screen(self, message, skip_eol=False, quiet=None):
|
||||
"""Print message to screen if not in quiet mode"""
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
elif not quiet or self.params.get('verbose'):
|
||||
self._write_string(
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._err_file if quiet else self._screen_file)
|
||||
return
|
||||
if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
|
||||
return
|
||||
self._write_string(
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._out_files['screen'])
|
||||
|
||||
def to_stderr(self, message, only_once=False):
|
||||
"""Print message to stderr"""
|
||||
@@ -803,7 +822,12 @@ def to_stderr(self, message, only_once=False):
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
|
||||
self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
|
||||
|
||||
def _send_console_code(self, code):
|
||||
if compat_os_name == 'nt' or not self._out_files['console']:
|
||||
return
|
||||
self._write_string(code, self._out_files['console'])
|
||||
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
@@ -814,26 +838,18 @@ def to_console_title(self, message):
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||
else:
|
||||
self._send_console_code(f'\033]0;{message}\007')
|
||||
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
if not self.params.get('consoletitle') or self.params.get('simulate'):
|
||||
return
|
||||
if self.params.get('simulate'):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
self._write_string('\033[22;0t', self._screen_file)
|
||||
self._send_console_code('\033[22;0t') # Save the title on stack
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
if not self.params.get('consoletitle') or self.params.get('simulate'):
|
||||
return
|
||||
if self.params.get('simulate'):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
self._write_string('\033[23;0t', self._screen_file)
|
||||
self._send_console_code('\033[23;0t') # Restore the title from stack
|
||||
|
||||
def __enter__(self):
|
||||
self.save_console_title()
|
||||
@@ -879,11 +895,6 @@ def trouble(self, message=None, tb=None, is_error=True):
|
||||
raise DownloadError(message, exc_info)
|
||||
self._download_retcode = 1
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode"""
|
||||
self.to_stdout(
|
||||
message, skip_eol, quiet=self.params.get('quiet', False))
|
||||
|
||||
class Styles(Enum):
|
||||
HEADERS = 'yellow'
|
||||
EMPHASIS = 'light blue'
|
||||
@@ -907,11 +918,11 @@ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_enc
|
||||
|
||||
def _format_screen(self, *args, **kwargs):
|
||||
return self._format_text(
|
||||
self._screen_file, self._allow_colors['screen'], *args, **kwargs)
|
||||
self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
|
||||
|
||||
def _format_err(self, *args, **kwargs):
|
||||
return self._format_text(
|
||||
self._err_file, self._allow_colors['err'], *args, **kwargs)
|
||||
self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
|
||||
|
||||
def report_warning(self, message, only_once=False):
|
||||
'''
|
||||
@@ -927,7 +938,7 @@ def report_warning(self, message, only_once=False):
|
||||
|
||||
def deprecation_warning(self, message):
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning('DeprecationWarning: {message}')
|
||||
self.params['logger'].warning(f'DeprecationWarning: {message}')
|
||||
else:
|
||||
self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
|
||||
|
||||
@@ -1090,10 +1101,11 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||
(?P<fields>{field})
|
||||
(?P<maths>(?:{math_op}{math_field})*)
|
||||
(?:>(?P<strf_format>.+?))?
|
||||
(?P<alternate>(?<!\\),[^|&)]+)?
|
||||
(?:&(?P<replacement>.*?))?
|
||||
(?:\|(?P<default>.*?))?
|
||||
$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
|
||||
(?P<remaining>
|
||||
(?P<alternate>(?<!\\),[^|&)]+)?
|
||||
(?:&(?P<replacement>.*?))?
|
||||
(?:\|(?P<default>.*?))?
|
||||
)$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
|
||||
|
||||
def _traverse_infodict(k):
|
||||
k = k.split('.')
|
||||
@@ -1140,8 +1152,10 @@ def get_value(mdict):
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
|
||||
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
||||
return sanitize_filename(str(value), restricted=restricted,
|
||||
is_id=re.search(r'(^|[_.])id(\.|$)', key))
|
||||
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
||||
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||
if 'filename-sanitization' in self.params.get('compat_opts', [])
|
||||
else NO_DEFAULT))
|
||||
|
||||
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
||||
sanitize = bool(sanitize)
|
||||
@@ -1164,7 +1178,7 @@ def create_key(outer_mobj):
|
||||
value = get_value(mobj)
|
||||
replacement = mobj['replacement']
|
||||
if value is None and mobj['alternate']:
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
|
||||
else:
|
||||
break
|
||||
|
||||
@@ -1226,18 +1240,21 @@ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
|
||||
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
|
||||
return self.escape_outtmpl(outtmpl) % info_dict
|
||||
|
||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
|
||||
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
|
||||
if outtmpl is None:
|
||||
outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
|
||||
try:
|
||||
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
||||
outtmpl = self._outtmpl_expandpath(outtmpl)
|
||||
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
if tmpl_type in ('default', 'temp'):
|
||||
if tmpl_type in ('', 'temp'):
|
||||
final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
|
||||
if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
|
||||
filename = replace_extension(filename, ext, final_ext)
|
||||
else:
|
||||
elif tmpl_type:
|
||||
force_ext = OUTTMPL_TYPES[tmpl_type]
|
||||
if force_ext:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
@@ -1253,10 +1270,12 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def prepare_filename(self, info_dict, dir_type='', warn=False):
|
||||
"""Generate the output filename."""
|
||||
|
||||
filename = self._prepare_filename(info_dict, dir_type or 'default')
|
||||
def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
|
||||
"""Generate the output filename"""
|
||||
if outtmpl:
|
||||
assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
|
||||
dir_type = None
|
||||
filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
|
||||
if not filename and dir_type not in ('', 'temp'):
|
||||
return ''
|
||||
|
||||
@@ -1430,7 +1449,7 @@ def progress(msg):
|
||||
min_wait, max_wait = self.params.get('wait_for_video')
|
||||
diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
|
||||
if diff is None and ie_result.get('live_status') == 'is_upcoming':
|
||||
diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
|
||||
diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
|
||||
self.report_warning('Release time of video is not known')
|
||||
elif (diff or 0) <= 0:
|
||||
self.report_warning('Video should already be available according to extracted info')
|
||||
@@ -1561,13 +1580,9 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
|
||||
if not info:
|
||||
return info
|
||||
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
|
||||
if f in force_properties:
|
||||
del force_properties[f]
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
new_result.update(filter_dict(ie_result, lambda k, v: (
|
||||
v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
|
||||
|
||||
# Extracted info may not be a video result (i.e.
|
||||
# info.get('_type', 'video') != video) but rather an url or
|
||||
@@ -1805,7 +1820,7 @@ def get_entry(i):
|
||||
ie_result['entries'] = playlist_results
|
||||
|
||||
# Write the updated info to json
|
||||
if _infojson_written and self._write_info_json(
|
||||
if _infojson_written is True and self._write_info_json(
|
||||
'updated playlist', ie_result,
|
||||
self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
|
||||
return
|
||||
@@ -2164,7 +2179,8 @@ def selector_function(ctx):
|
||||
yield from _check_formats(ctx['formats'][::-1])
|
||||
elif format_spec == 'mergeall':
|
||||
def selector_function(ctx):
|
||||
formats = list(_check_formats(ctx['formats']))
|
||||
formats = list(_check_formats(
|
||||
f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
|
||||
if not formats:
|
||||
return
|
||||
merged_format = formats[-1]
|
||||
@@ -2173,7 +2189,7 @@ def selector_function(ctx):
|
||||
yield merged_format
|
||||
|
||||
else:
|
||||
format_fallback, format_reverse, format_idx = False, True, 1
|
||||
format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
|
||||
mobj = re.match(
|
||||
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
|
||||
format_spec)
|
||||
@@ -2200,6 +2216,7 @@ def selector_function(ctx):
|
||||
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
|
||||
elif format_spec in self._format_selection_exts['video']:
|
||||
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
|
||||
seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
|
||||
elif format_spec in self._format_selection_exts['storyboards']:
|
||||
filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
|
||||
else:
|
||||
@@ -2208,11 +2225,15 @@ def selector_function(ctx):
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
||||
if format_fallback and ctx['incomplete_formats'] and not matches:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
if not matches:
|
||||
if format_fallback and ctx['incomplete_formats']:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
elif seperate_fallback and not ctx['has_merged_format']:
|
||||
# for compatibility with youtube-dl when there is no pre-merged format
|
||||
matches = list(filter(seperate_fallback, formats))
|
||||
matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
|
||||
try:
|
||||
yield matches[format_idx - 1]
|
||||
@@ -2446,6 +2467,11 @@ def sanitize_numeric_fields(info):
|
||||
info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
if info_dict['__has_drm'] and all(
|
||||
f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||
self.report_warning(
|
||||
'This video is DRM protected and only images are available for download. '
|
||||
'Use --list-formats to see them')
|
||||
|
||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||
if not get_from_start:
|
||||
@@ -2453,8 +2479,9 @@ def sanitize_numeric_fields(info):
|
||||
if info_dict.get('is_live') and formats:
|
||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||
if get_from_start and not formats:
|
||||
self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
|
||||
'If you want to download from the current time, pass --no-live-from-start')
|
||||
self.raise_no_formats(info_dict, msg=(
|
||||
'--live-from-start is passed, but there are no formats that can be downloaded from the start. '
|
||||
'If you want to download from the current time, use --no-live-from-start'))
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
@@ -2542,7 +2569,7 @@ def is_wellformed(f):
|
||||
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
if self._match_entry(info_dict) is not None:
|
||||
if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
|
||||
return info_dict
|
||||
|
||||
self.post_extract(info_dict)
|
||||
@@ -2584,33 +2611,15 @@ def is_wellformed(f):
|
||||
self.report_error(err, tb=False, is_error=False)
|
||||
continue
|
||||
|
||||
# While in format selection we may need to have an access to the original
|
||||
# format set in order to calculate some metrics or do some processing.
|
||||
# For now we need to be able to guess whether original formats provided
|
||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||
# video-only or audio-only formats) for proper formats selection for
|
||||
# extractors with such incomplete formats (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/5556).
|
||||
# Since formats may be filtered during format selection and may not match
|
||||
# the original formats the results may be incorrect. Thus original formats
|
||||
# or pre-calculated metrics should be passed to format selection routines
|
||||
# as well.
|
||||
# We will pass a context object containing all necessary additional data
|
||||
# instead of just formats.
|
||||
# This fixes incorrect format selection issue (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10083).
|
||||
incomplete_formats = (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
||||
# all formats are audio-only
|
||||
or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||
|
||||
ctx = {
|
||||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'incomplete_formats': incomplete_formats,
|
||||
}
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
||||
# all formats are audio-only
|
||||
or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
|
||||
}))
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
continue
|
||||
@@ -2618,8 +2627,9 @@ def is_wellformed(f):
|
||||
|
||||
if not formats_to_download:
|
||||
if not self.params.get('ignore_no_formats_error'):
|
||||
raise ExtractorError('Requested format is not available', expected=True,
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
raise ExtractorError(
|
||||
'Requested format is not available. Use --list-formats for a list of available formats',
|
||||
expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
self.report_warning('Requested format is not available')
|
||||
# Process what we can, even without any available formats.
|
||||
formats_to_download = [{}]
|
||||
@@ -2661,9 +2671,10 @@ def is_wellformed(f):
|
||||
|
||||
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
"""Select the requested subtitles and their format"""
|
||||
available_subs = {}
|
||||
available_subs, normal_sub_langs = {}, []
|
||||
if normal_subtitles and self.params.get('writesubtitles'):
|
||||
available_subs.update(normal_subtitles)
|
||||
normal_sub_langs = tuple(normal_subtitles.keys())
|
||||
if automatic_captions and self.params.get('writeautomaticsub'):
|
||||
for lang, cap_info in automatic_captions.items():
|
||||
if lang not in available_subs:
|
||||
@@ -2674,7 +2685,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
available_subs):
|
||||
return None
|
||||
|
||||
all_sub_langs = available_subs.keys()
|
||||
all_sub_langs = tuple(available_subs.keys())
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = all_sub_langs
|
||||
elif self.params.get('subtitleslangs', False):
|
||||
@@ -2699,10 +2710,10 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
else:
|
||||
requested_langs.extend(current_langs)
|
||||
requested_langs = orderedSet(requested_langs)
|
||||
elif 'en' in available_subs:
|
||||
requested_langs = ['en']
|
||||
elif normal_sub_langs:
|
||||
requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
|
||||
else:
|
||||
requested_langs = [list(all_sub_langs)[0]]
|
||||
requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
|
||||
if requested_langs:
|
||||
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
|
||||
|
||||
@@ -2751,7 +2762,7 @@ def format_tmpl(tmpl):
|
||||
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
|
||||
|
||||
for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
|
||||
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
|
||||
filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
|
||||
tmpl = format_tmpl(tmpl)
|
||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||
if self._ensure_dir_exists(filename):
|
||||
@@ -2777,7 +2788,7 @@ def print_optional(field):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
# For RTMP URLs, also include the playpath
|
||||
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
||||
elif 'url' in info_dict:
|
||||
elif info_dict.get('url'):
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
|
||||
if (self.params.get('forcejson')
|
||||
@@ -2861,14 +2872,13 @@ def process_info(self, info_dict):
|
||||
|
||||
# Does nothing under normal operation - for backward compatibility of process_info
|
||||
self.post_extract(info_dict)
|
||||
self._num_downloads += 1
|
||||
|
||||
# info_dict['_filename'] needs to be set for backward compatibility
|
||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||
files_to_move = {}
|
||||
|
||||
self._num_downloads += 1
|
||||
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
|
||||
|
||||
@@ -3604,7 +3614,7 @@ def get_encoding(stream):
|
||||
encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
get_encoding(self._screen_file), get_encoding(self._err_file),
|
||||
get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
|
||||
self.get_encoding())
|
||||
|
||||
logger = self.params.get('logger')
|
||||
@@ -3679,6 +3689,7 @@ def python_implementation():
|
||||
|
||||
lib_str = join_nonempty(
|
||||
compat_brotli and compat_brotli.__name__,
|
||||
has_certifi and 'certifi',
|
||||
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
|
||||
SECRETSTORAGE_AVAILABLE and 'secretstorage',
|
||||
has_mutagen and 'mutagen',
|
||||
@@ -3770,7 +3781,7 @@ def get_encoding(self):
|
||||
return encoding
|
||||
|
||||
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
|
||||
''' Write infojson and returns True = written, False = skip, None = error '''
|
||||
''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
|
||||
if overwrite is None:
|
||||
overwrite = self.params.get('overwrites', True)
|
||||
if not self.params.get('writeinfojson'):
|
||||
@@ -3782,14 +3793,15 @@ def _write_info_json(self, label, ie_result, infofn, overwrite=None):
|
||||
return None
|
||||
elif not overwrite and os.path.exists(infofn):
|
||||
self.to_screen(f'[info] {label.title()} metadata is already present')
|
||||
else:
|
||||
self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
|
||||
try:
|
||||
write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
|
||||
return None
|
||||
return True
|
||||
return 'exists'
|
||||
|
||||
self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
|
||||
try:
|
||||
write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
return True
|
||||
except (OSError, IOError):
|
||||
self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
|
||||
return None
|
||||
|
||||
def _write_description(self, label, ie_result, descfn):
|
||||
''' Write description and returns True = written, False = skip, None = error '''
|
||||
@@ -3860,9 +3872,12 @@ def _write_subtitles(self, info_dict, filename):
|
||||
sub_info['filepath'] = sub_filename
|
||||
ret.append((sub_filename, sub_filename_final))
|
||||
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
|
||||
msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
|
||||
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
|
||||
raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
|
||||
self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
|
||||
if not self.params.get('ignoreerrors'):
|
||||
self.report_error(msg)
|
||||
raise DownloadError(msg)
|
||||
self.report_warning(msg)
|
||||
return ret
|
||||
|
||||
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
|
||||
|
||||
@@ -94,9 +94,9 @@ def print_extractor_information(opts, urls):
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
if not ie.working():
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
if desc is False:
|
||||
if ie.IE_DESC is False:
|
||||
continue
|
||||
desc = ie.IE_DESC or ie.IE_NAME
|
||||
if getattr(ie, 'SEARCH_KEY', None) is not None:
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
@@ -198,10 +198,13 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
|
||||
validate_positive('requests sleep interval', opts.sleep_interval_requests)
|
||||
validate_positive('sleep interval', opts.sleep_interval)
|
||||
validate_positive('max sleep interval', opts.max_sleep_interval)
|
||||
if opts.max_sleep_interval is not None:
|
||||
if opts.sleep_interval is None:
|
||||
validate(
|
||||
opts.sleep_interval is not None, 'min sleep interval',
|
||||
opts.max_sleep_interval is None, 'min sleep interval',
|
||||
msg='{name} must be specified; use --min-sleep-interval')
|
||||
elif opts.max_sleep_interval is None:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
else:
|
||||
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
|
||||
|
||||
if opts.wait_for_video is not None:
|
||||
@@ -255,7 +258,7 @@ def parse_bytes(name, value):
|
||||
return numeric_limit
|
||||
|
||||
opts.ratelimit = parse_bytes('rate limit', opts.ratelimit)
|
||||
opts.ratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit)
|
||||
opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit)
|
||||
opts.min_filesize = parse_bytes('min filesize', opts.min_filesize)
|
||||
opts.max_filesize = parse_bytes('max filesize', opts.max_filesize)
|
||||
opts.buffersize = parse_bytes('buffer size', opts.buffersize)
|
||||
@@ -355,7 +358,6 @@ def metadataparser_actions(f):
|
||||
raise ValueError('unsupported geo-bypass country or ip-block')
|
||||
|
||||
opts.match_filter = match_filter_func(opts.match_filter)
|
||||
opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore)
|
||||
|
||||
if opts.download_archive is not None:
|
||||
opts.download_archive = expand_path(opts.download_archive)
|
||||
@@ -377,7 +379,7 @@ def metadataparser_actions(f):
|
||||
'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
|
||||
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
|
||||
|
||||
# --(post-processor/downloader)-args without name
|
||||
# --(postprocessor/downloader)-args without name
|
||||
def report_args_compat(name, value, key1, key2=None):
|
||||
if key1 in value and key2 not in value:
|
||||
warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s')
|
||||
@@ -403,8 +405,8 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u
|
||||
setattr(opts, opt1, default)
|
||||
|
||||
# Conflicting options
|
||||
report_conflict('--date-after', 'dateafter', '--date', 'date', default=None)
|
||||
report_conflict('--date-before', 'datebefore', '--date', 'date', default=None)
|
||||
report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
|
||||
report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
|
||||
report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl'))
|
||||
report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default'))
|
||||
report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo')
|
||||
@@ -443,6 +445,8 @@ def report_deprecation(val, old, new=None):
|
||||
# report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it
|
||||
|
||||
# Dependent options
|
||||
opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore)
|
||||
|
||||
if opts.exec_before_dl_cmd:
|
||||
opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd
|
||||
|
||||
@@ -818,6 +822,7 @@ def _real_main(argv=None):
|
||||
if opts.dump_user_agent:
|
||||
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
|
||||
write_string(f'{ua}\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
if print_extractor_information(opts, all_urls):
|
||||
sys.exit(0)
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
compat_cookiejar_Cookie,
|
||||
)
|
||||
from .utils import (
|
||||
error_to_str,
|
||||
expand_path,
|
||||
Popen,
|
||||
YoutubeDLCookieJar,
|
||||
@@ -721,7 +722,7 @@ def _get_kwallet_network_wallet(logger):
|
||||
network_wallet = stdout.decode('utf-8').strip()
|
||||
logger.debug('NetworkWallet = "{}"'.format(network_wallet))
|
||||
return network_wallet
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
|
||||
return default_wallet
|
||||
|
||||
@@ -766,8 +767,8 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||
if stdout[-1:] == b'\n':
|
||||
stdout = stdout[:-1]
|
||||
return stdout
|
||||
except BaseException as e:
|
||||
logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
|
||||
except Exception as e:
|
||||
logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
|
||||
return b''
|
||||
|
||||
|
||||
@@ -823,8 +824,8 @@ def _get_mac_keyring_password(browser_keyring_name, logger):
|
||||
if stdout[-1:] == b'\n':
|
||||
stdout = stdout[:-1]
|
||||
return stdout
|
||||
except BaseException as e:
|
||||
logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
|
||||
except Exception as e:
|
||||
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
format_bytes,
|
||||
LockingUnsupportedError,
|
||||
sanitize_open,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
@@ -159,7 +160,7 @@ def parse_bytes(bytestr):
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
|
||||
self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_stderr(message)
|
||||
@@ -234,7 +235,10 @@ def inner(self, *args, **kwargs):
|
||||
|
||||
@wrap_file_access('open', fatal=True)
|
||||
def sanitize_open(self, filename, open_mode):
|
||||
return sanitize_open(filename, open_mode)
|
||||
f, filename = sanitize_open(filename, open_mode)
|
||||
if not getattr(f, 'locked', None):
|
||||
self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
|
||||
return f, filename
|
||||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
@@ -277,9 +281,9 @@ def _prepare_multiline_status(self, lines=1):
|
||||
elif self.ydl.params.get('logger'):
|
||||
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
|
||||
elif self.params.get('progress_with_newline'):
|
||||
self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
|
||||
self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
|
||||
else:
|
||||
self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
|
||||
self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
|
||||
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
|
||||
|
||||
def _finish_multiline_status(self):
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
)
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||
from ..utils import (
|
||||
classproperty,
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
@@ -73,17 +74,23 @@ def real_download(self, filename, info_dict):
|
||||
def get_basename(cls):
|
||||
return cls.__name__[:-2].lower()
|
||||
|
||||
@classproperty
|
||||
def EXE_NAME(cls):
|
||||
return cls.get_basename()
|
||||
|
||||
@property
|
||||
def exe(self):
|
||||
return self.get_basename()
|
||||
return self.EXE_NAME
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
if path:
|
||||
cls.exe = path
|
||||
return path
|
||||
return False
|
||||
path = check_executable(
|
||||
cls.EXE_NAME if path in (None, cls.get_basename()) else path,
|
||||
[cls.AVAILABLE_OPT])
|
||||
if not path:
|
||||
return False
|
||||
cls.exe = path
|
||||
return path
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
@@ -106,7 +113,7 @@ def _valueless_option(self, command_option, param, expected_value=True):
|
||||
|
||||
def _configuration_args(self, keys=None, *args, **kwargs):
|
||||
return _configuration_args(
|
||||
self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
|
||||
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
|
||||
keys, *args, **kwargs)
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
@@ -169,7 +176,7 @@ class CurlFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
@@ -219,7 +226,7 @@ class WgetFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
@@ -230,7 +237,10 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
retry[1] = '0'
|
||||
cmd += retry
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
for var in ('http_proxy', 'https_proxy'):
|
||||
cmd += ['--execute', '%s=%s' % (var, proxy)]
|
||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
@@ -303,10 +313,7 @@ def _make_cmd(self, tmpfilename, info_dict):
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return super().available(path or 'http')
|
||||
EXE_NAME = 'http'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
@@ -507,11 +514,13 @@ class AVconvFD(FFmpegFD):
|
||||
pass
|
||||
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
_BY_NAME = {
|
||||
klass.get_basename(): klass
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
|
||||
)
|
||||
}
|
||||
|
||||
_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
|
||||
|
||||
|
||||
def list_external_downloaders():
|
||||
@@ -523,4 +532,4 @@ def get_external_downloader(external_downloader):
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME.get(bn)
|
||||
return _BY_NAME.get(bn, _BY_EXE.get(bn))
|
||||
|
||||
@@ -133,19 +133,19 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
|
||||
}
|
||||
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||
if not success:
|
||||
return False, None
|
||||
return False
|
||||
if fragment_info_dict.get('filetime'):
|
||||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||
ctx['fragment_filename_sanitized'] = fragment_filename
|
||||
try:
|
||||
return True, self._read_fragment(ctx)
|
||||
except FileNotFoundError:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
return False, None
|
||||
return True
|
||||
|
||||
def _read_fragment(self, ctx):
|
||||
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
|
||||
try:
|
||||
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
|
||||
except FileNotFoundError:
|
||||
if ctx.get('live'):
|
||||
return None
|
||||
raise
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
@@ -403,7 +403,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def bindoj_result(future):
|
||||
def future_result(future):
|
||||
while True:
|
||||
try:
|
||||
return future.result(0.1)
|
||||
@@ -412,7 +412,7 @@ def bindoj_result(future):
|
||||
except concurrent.futures.TimeoutError:
|
||||
continue
|
||||
else:
|
||||
def bindoj_result(future):
|
||||
def future_result(future):
|
||||
return future.result()
|
||||
|
||||
def interrupt_trigger_iter(fg):
|
||||
@@ -430,7 +430,7 @@ def interrupt_trigger_iter(fg):
|
||||
result = True
|
||||
for tpe, job in spins:
|
||||
try:
|
||||
result = result and bindoj_result(job)
|
||||
result = result and future_result(job)
|
||||
except KeyboardInterrupt:
|
||||
interrupt_trigger[0] = False
|
||||
finally:
|
||||
@@ -457,7 +457,7 @@ def download_and_append_fragments(
|
||||
|
||||
def download_fragment(fragment, ctx):
|
||||
if not interrupt_trigger[0]:
|
||||
return False, fragment['frag_index']
|
||||
return
|
||||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
@@ -467,14 +467,12 @@ def download_fragment(fragment, ctx):
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
|
||||
# Never skip the first fragment
|
||||
fatal = is_fatal(fragment.get('index') or (frag_index - 1))
|
||||
count, frag_content = 0, None
|
||||
fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
if self._download_fragment(ctx, fragment['url'], info_dict, headers):
|
||||
break
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
@@ -491,25 +489,19 @@ def download_fragment(fragment, ctx):
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
return False, frag_index
|
||||
if count > fragment_retries and fatal:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
return frag_content, frag_index
|
||||
|
||||
def append_fragment(frag_content, frag_index, ctx):
|
||||
if not frag_content:
|
||||
if not is_fatal(frag_index - 1):
|
||||
self.report_skip_fragment(frag_index, 'fragment not found')
|
||||
return True
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
self._append_fragment(ctx, pack_func(frag_content, frag_index))
|
||||
if frag_content:
|
||||
self._append_fragment(ctx, pack_func(frag_content, frag_index))
|
||||
elif not is_fatal(frag_index - 1):
|
||||
self.report_skip_fragment(frag_index, 'fragment not found')
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(f'fragment {frag_index} not found, unable to continue')
|
||||
return False
|
||||
return True
|
||||
|
||||
decrypt_fragment = self.decrypter(info_dict)
|
||||
@@ -520,23 +512,23 @@ def append_fragment(frag_content, frag_index, ctx):
|
||||
|
||||
def _download_fragment(fragment):
|
||||
ctx_copy = ctx.copy()
|
||||
frag_content, frag_index = download_fragment(fragment, ctx_copy)
|
||||
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
|
||||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
ctx['fragment_filename_sanitized'] = frag_filename
|
||||
ctx['fragment_index'] = frag_index
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
|
||||
result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments:
|
||||
if not interrupt_trigger[0]:
|
||||
break
|
||||
frag_content, frag_index = download_fragment(fragment, ctx)
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
|
||||
download_fragment(fragment, ctx)
|
||||
result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import os
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import random
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_http_client
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
@@ -18,11 +17,14 @@
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
ThrottledDownload,
|
||||
try_call,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
)
|
||||
|
||||
RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
@@ -53,11 +55,8 @@ class DownloadContext(dict):
|
||||
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
ctx.data_len = None
|
||||
ctx.block_size = self.params.get('buffersize', 1024)
|
||||
ctx.start_time = time.time()
|
||||
ctx.chunk_size = None
|
||||
throttle_start = None
|
||||
|
||||
# parse given Range
|
||||
req_start, req_end, _ = parse_http_range(headers.get('Range'))
|
||||
@@ -83,12 +82,6 @@ def __init__(self, source_error):
|
||||
class NextFragment(Exception):
|
||||
pass
|
||||
|
||||
def set_range(req, start, end):
|
||||
range_header = 'bytes=%d-' % start
|
||||
if end:
|
||||
range_header += compat_str(end)
|
||||
req.add_header('Range', range_header)
|
||||
|
||||
def establish_connection():
|
||||
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
|
||||
if not is_test and chunk_size else chunk_size)
|
||||
@@ -100,6 +93,8 @@ def establish_connection():
|
||||
if ctx.is_resume:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
elif req_start is not None:
|
||||
range_start = req_start
|
||||
elif ctx.chunk_size > 0:
|
||||
range_start = 0
|
||||
else:
|
||||
@@ -116,23 +111,21 @@ def establish_connection():
|
||||
else:
|
||||
range_end = None
|
||||
|
||||
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
|
||||
range_end = ctx.data_len - 1
|
||||
has_range = range_start is not None
|
||||
ctx.has_range = has_range
|
||||
if try_call(lambda: range_start > range_end):
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
|
||||
|
||||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = sanitized_Request(url, request_data, headers)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
set_range(request, range_start, range_end)
|
||||
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
|
||||
# Establish connection
|
||||
try:
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
except (compat_urllib_error.URLError, ) as err:
|
||||
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||
reason = getattr(err, 'reason', None)
|
||||
if isinstance(reason, socket.timeout):
|
||||
raise RetryDownload(err)
|
||||
raise err
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
@@ -151,7 +144,9 @@ def establish_connection():
|
||||
or content_range_end == range_end
|
||||
or content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
ctx.content_len = content_len
|
||||
if content_len or req_end:
|
||||
ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
@@ -159,8 +154,7 @@ def establish_connection():
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
return
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
@@ -202,16 +196,16 @@ def establish_connection():
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except socket.timeout as err:
|
||||
except compat_urllib_error.URLError as err:
|
||||
if isinstance(err.reason, ssl.CertificateError):
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
||||
# Any errors that occur during this will not be wrapped by URLError
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||
# Connection reset is no problem, just retry
|
||||
raise RetryDownload(err)
|
||||
raise
|
||||
|
||||
def download():
|
||||
nonlocal throttle_start
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
@@ -254,16 +248,8 @@ def retry(e):
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
# socket.timeout is a subclass of socket.error but may not have
|
||||
# errno set
|
||||
except socket.timeout as e:
|
||||
retry(e)
|
||||
except socket.error as e:
|
||||
# SSLError on python 2 (inherits socket.error) may have
|
||||
# no errno set but this error message
|
||||
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
|
||||
retry(e)
|
||||
raise
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
retry(err)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
||||
@@ -334,16 +320,16 @@ def retry(e):
|
||||
if speed and speed < (self.params.get('throttledratelimit') or 0):
|
||||
# The speed must stay below the limit for 3 seconds
|
||||
# This prevents raising error when the speed temporarily goes down
|
||||
if throttle_start is None:
|
||||
throttle_start = now
|
||||
elif now - throttle_start > 3:
|
||||
if ctx.throttle_start is None:
|
||||
ctx.throttle_start = now
|
||||
elif now - ctx.throttle_start > 3:
|
||||
if ctx.stream is not None and ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
raise ThrottledDownload()
|
||||
elif speed:
|
||||
throttle_start = None
|
||||
ctx.throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
@@ -263,9 +263,11 @@ def real_download(self, filename, info_dict):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
success = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
frag_content = self._read_fragment(ctx)
|
||||
|
||||
if not extra_state['ism_track_written']:
|
||||
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
|
||||
@@ -166,10 +166,15 @@ def real_download(self, filename, info_dict):
|
||||
if (i + 1) <= ctx['fragment_index']:
|
||||
continue
|
||||
|
||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
fragment_url = fragment.get('url')
|
||||
if not fragment_url:
|
||||
assert fragment_base_url
|
||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||
|
||||
success = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
continue
|
||||
frag_content = self._read_fragment(ctx)
|
||||
|
||||
mime_type = b'image/jpeg'
|
||||
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
|
||||
@@ -22,7 +22,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
if not self.params.get('skip_download'):
|
||||
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
|
||||
self.report_warning('Live chat download runs until the livestream ends. '
|
||||
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
|
||||
|
||||
@@ -115,9 +115,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, raw_fragment = dl_fragment(url, request_data, headers)
|
||||
success = dl_fragment(url, request_data, headers)
|
||||
if not success:
|
||||
return False, None, None, None
|
||||
raw_fragment = self._read_fragment(ctx)
|
||||
try:
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
@@ -145,9 +146,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
|
||||
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
success, raw_fragment = dl_fragment(info_dict['url'])
|
||||
success = dl_fragment(info_dict['url'])
|
||||
if not success:
|
||||
return False
|
||||
raw_fragment = self._read_fragment(ctx)
|
||||
try:
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
|
||||
@@ -291,15 +291,7 @@ def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if not username:
|
||||
return True
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
|
||||
@@ -126,10 +126,7 @@ def _get_subtitles(self, sub_url, video_id):
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_initialize(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
return
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
|
||||
@@ -14,7 +14,7 @@ class AdobeConnectIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
|
||||
@@ -1650,21 +1650,27 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
hidden_data = self._hidden_inputs(first_bookend_page)
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page = self._download_webpage(
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_tryauth_url = self._html_search_regex(
|
||||
r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
|
||||
provider_tryauth_page = self._download_webpage(
|
||||
provider_tryauth_url, video_id, 'Submitting TryAuth',
|
||||
query=hidden_data)
|
||||
# Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already
|
||||
# have the login prompt or not
|
||||
if 'id="password" type="password" name="password"' in provider_login_redirect_page:
|
||||
provider_login_page_res = provider_login_redirect_page_res
|
||||
else:
|
||||
provider_tryauth_url = self._html_search_regex(
|
||||
r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
|
||||
provider_tryauth_page = self._download_webpage(
|
||||
provider_tryauth_url, video_id, 'Submitting TryAuth',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
|
||||
video_id, 'Getting Login Page',
|
||||
query=hidden_data)
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
|
||||
video_id, 'Getting Login Page',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_xpath
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
date_from_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
@@ -32,7 +34,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/PLAYER/STATION/
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -170,6 +172,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/player/15055030',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -181,14 +186,7 @@ def parse_video_key(key):
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = {
|
||||
'szWork': 'login',
|
||||
'szType': 'json',
|
||||
@@ -486,3 +484,57 @@ def _real_extract(self, url):
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - review',
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
'title': 'parang1995 - highlight',
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - all',
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
'title': 'ryuryu24 - balloonclip',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}]
|
||||
_PER_PAGE = 60
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
user_type = user_type or 'all'
|
||||
entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
|
||||
return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
@@ -102,10 +103,7 @@ def _real_extract(self, url):
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||
' - AlloCiné')
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
||||
@@ -74,14 +74,7 @@ def _real_extract(self, url):
|
||||
"formats": formats
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
pass
|
||||
def _perform_login(self, username, password):
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
@@ -15,25 +15,21 @@
|
||||
|
||||
|
||||
class AnimeLabBaseIE(InfoExtractor):
|
||||
_LOGIN_REQUIRED = True
|
||||
_LOGIN_URL = 'https://www.animelab.com/login'
|
||||
_NETRC_MACHINE = 'animelab'
|
||||
_LOGGED_IN = False
|
||||
|
||||
def _login(self):
|
||||
def is_logged_in(login_webpage):
|
||||
return 'Sign In' not in login_webpage
|
||||
def _is_logged_in(self, login_page=None):
|
||||
if not self._LOGGED_IN:
|
||||
if not login_page:
|
||||
login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
|
||||
AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
|
||||
return self._LOGGED_IN
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
# Check if already logged in
|
||||
if is_logged_in(login_page):
|
||||
def _perform_login(self, username, password):
|
||||
if self._is_logged_in():
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None and self._LOGIN_REQUIRED:
|
||||
self.raise_login_required('Login is required to access any AnimeLab content')
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
@@ -47,17 +43,14 @@ def is_logged_in(login_webpage):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
|
||||
else:
|
||||
raise
|
||||
raise
|
||||
|
||||
# if login was successful
|
||||
if is_logged_in(response):
|
||||
return
|
||||
|
||||
raise ExtractorError('Unable to login (cannot verify if logged in)')
|
||||
if not self._is_logged_in(response):
|
||||
raise ExtractorError('Unable to login (cannot verify if logged in)')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
if not self._is_logged_in():
|
||||
self.raise_login_required('Login is required to access any AnimeLab content')
|
||||
|
||||
|
||||
class AnimeLabIE(AnimeLabBaseIE):
|
||||
|
||||
@@ -53,11 +53,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
@@ -93,9 +89,6 @@ def _login(self):
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
anime_id = self._match_id(url)
|
||||
|
||||
|
||||
@@ -457,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note='Downloading CDX API JSON'):
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@@ -468,7 +468,9 @@ def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = Non
|
||||
'collapse': collapse or [],
|
||||
**(query or {})
|
||||
}
|
||||
res = self._download_json('https://web.archive.org/cdx/search/cdx', item_id, note, query=query)
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
@@ -481,8 +483,7 @@ def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
|
||||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
||||
|
||||
def _extract_webpage_title(self, webpage):
|
||||
page_title = self._html_search_regex(
|
||||
r'<title>([^<]*)</title>', webpage, 'title', default='')
|
||||
page_title = self._html_extract_title(webpage, default='')
|
||||
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
|
||||
return self._html_search_regex(
|
||||
r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
|
||||
|
||||
@@ -138,6 +138,7 @@ def _real_extract(self, url):
|
||||
break
|
||||
else:
|
||||
lang_pref = -1
|
||||
format_note = '%s, %s' % (f.get('versionCode'), f.get('versionLibelle'))
|
||||
|
||||
media_type = f.get('mediaType')
|
||||
if media_type == 'hls':
|
||||
@@ -145,14 +146,17 @@ def _real_extract(self, url):
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['language_preference'] = lang_pref
|
||||
m3u8_format.update({
|
||||
'language_preference': lang_pref,
|
||||
'format_note': format_note,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'language_preference': lang_pref,
|
||||
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
||||
'format_note': format_note,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
|
||||
@@ -181,8 +181,7 @@ def _real_extract(self, url):
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
|
||||
@@ -37,9 +37,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
@@ -48,11 +45,7 @@ def _handle_error(self, e, code):
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
|
||||
@@ -11,11 +11,12 @@ class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:www\.|tv\.)?
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
telem1\.ch|
|
||||
tvo-online\.ch
|
||||
)/
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
@@ -30,7 +31,7 @@ class AZMedienIE(InfoExtractor):
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'url': 'https://tv.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
@@ -38,6 +39,9 @@ class AZMedienIE(InfoExtractor):
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
|
||||
'duration': 1930
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
153
yt_dlp/extractor/banbye.py
Normal file
153
yt_dlp/extractor/banbye.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BanByeBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.banbye.com'
|
||||
_CDN_BASE = 'https://cdn.banbye.com'
|
||||
_VIDEO_BASE = 'https://banbye.com/watch'
|
||||
|
||||
@staticmethod
|
||||
def _extract_playlist_id(url, param='playlist'):
|
||||
return compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
|
||||
|
||||
def _extract_playlist(self, playlist_id):
|
||||
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
|
||||
return self.playlist_result([
|
||||
self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
|
||||
for video_id in data['videoIds']], playlist_id, data.get('name'))
|
||||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
'id': 'v_ytfmvkVYLE8T',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
|
||||
'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'timestamp': 1647604800,
|
||||
'upload_date': '20220318',
|
||||
'duration': 1931,
|
||||
'thumbnail': r're:https?://.*\.webp',
|
||||
'tags': 'count:5',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
|
||||
'info_dict': {
|
||||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._extract_playlist_id(url, 'playlistId')
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
|
||||
thumbnails = [{
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data.get('title'),
|
||||
'description': data.get('desc'),
|
||||
'uploader': traverse_obj(data, ('channel', 'name')),
|
||||
'channel_id': data.get('channelId'),
|
||||
'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
|
||||
'timestamp': unified_timestamp(data.get('publishedAt')),
|
||||
'duration': data.get('duration'),
|
||||
'tags': data.get('tags'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': data.get('likes'),
|
||||
'dislike_count': data.get('dislikes'),
|
||||
'view_count': data.get('views'),
|
||||
'comment_count': data.get('commentCount'),
|
||||
}
|
||||
|
||||
|
||||
class BanByeChannelIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'info_dict': {
|
||||
'title': 'wRealu24',
|
||||
'id': 'ch_wrealu24',
|
||||
'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
|
||||
},
|
||||
'playlist_mincount': 791,
|
||||
}, {
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
|
||||
'info_dict': {
|
||||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
playlist_id = self._extract_playlist_id(url)
|
||||
|
||||
if playlist_id:
|
||||
return self._extract_playlist(playlist_id)
|
||||
|
||||
def page_func(page_num):
|
||||
data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
|
||||
'channelId': channel_id,
|
||||
'sort': 'new',
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page_num * self._PAGE_SIZE,
|
||||
}, note=f'Downloading page {page_num+1}')
|
||||
return [
|
||||
self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
|
||||
for video in data['items']
|
||||
]
|
||||
|
||||
channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
|
||||
entries = InAdvancePagedList(
|
||||
page_func,
|
||||
math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, channel_data.get('name'), channel_data.get('description'))
|
||||
@@ -264,11 +264,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading signin page')
|
||||
|
||||
@@ -294,9 +290,6 @@ def _login(self):
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
@@ -913,9 +906,8 @@ def _real_extract(self, url):
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
if not playlist_title:
|
||||
playlist_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
|
||||
playlist_title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'playlist title', default=None))
|
||||
if playlist_title:
|
||||
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
@@ -50,7 +51,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'md5': '7ac275ec84a99a6552c5d229659a0fe1',
|
||||
'info_dict': {
|
||||
'id': '1074402_part1',
|
||||
'ext': 'mp4',
|
||||
@@ -60,6 +61,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
'upload_date': '20140420',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1398012678,
|
||||
'tags': ['顶上去报复社会', '该来的总会来的', '金克拉是检验歌曲的唯一标准', '坷垃教主', '金坷垃', '邓紫棋', '治愈系坷垃'],
|
||||
'bv_id': 'BV11x411K7CN',
|
||||
'cid': '1554319',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/c79a8cf0347cd7a897c53a2f756e96aead128e8c.jpg',
|
||||
'duration': 308.36,
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
@@ -90,6 +96,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
'timestamp': 1488382634,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/49267ce20bc246be6304bf369a3ded0256854c23.jpg',
|
||||
'cid': '14694589',
|
||||
'duration': 554.117,
|
||||
'bv_id': 'BV13x41117TL',
|
||||
'tags': ['人文', '英语', '文化', '公开课', '阿滴英文'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -106,6 +117,27 @@ class BiliBiliIE(InfoExtractor):
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
# Correct matching of single and double quotes in title
|
||||
'url': 'https://www.bilibili.com/video/BV1NY411E7Rx/',
|
||||
'info_dict': {
|
||||
'id': '255513412_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vid"eo" Te\'st',
|
||||
'cid': '570602418',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/0c0de5a90b6d5b991b8dcc6cde0afbf71d564791.jpg',
|
||||
'upload_date': '20220408',
|
||||
'timestamp': 1649436552,
|
||||
'description': 'Vid"eo" Te\'st',
|
||||
'uploader_id': '1630758804',
|
||||
'bv_id': 'BV1NY411E7Rx',
|
||||
'duration': 60.394,
|
||||
'uploader': 'bili_31244483705',
|
||||
'tags': ['VLOG'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
@@ -257,7 +289,8 @@ def _real_extract(self, url):
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex((
|
||||
r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
|
||||
r'<h1[^>]+title=(["])(?P<content>[^"]+)',
|
||||
r'<h1[^>]+title=([\'])(?P<content>[^\']+)',
|
||||
r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
|
||||
self._meta_regex('title')
|
||||
), webpage, 'title', group='content', fatal=False)
|
||||
@@ -755,15 +788,21 @@ def json2srt(self, json):
|
||||
for i, line in enumerate(json['body']) if line.get('content'))
|
||||
return data
|
||||
|
||||
def _get_subtitles(self, ep_id):
|
||||
sub_json = self._call_api(f'/web/v2/subtitle?episode_id={ep_id}&platform=web', ep_id)
|
||||
def _get_subtitles(self, *, ep_id=None, aid=None):
|
||||
sub_json = self._call_api(
|
||||
'/web/v2/subtitle', ep_id or aid, note='Downloading subtitles list',
|
||||
errnote='Unable to download subtitles list', query=filter_dict({
|
||||
'platform': 'web',
|
||||
'episode_id': ep_id,
|
||||
'aid': aid,
|
||||
}))
|
||||
subtitles = {}
|
||||
for sub in sub_json.get('subtitles') or []:
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_data = self._download_json(
|
||||
sub_url, ep_id, errnote='Unable to download subtitles', fatal=False,
|
||||
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||
if not sub_data:
|
||||
continue
|
||||
@@ -773,9 +812,14 @@ def _get_subtitles(self, ep_id):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _get_formats(self, ep_id):
|
||||
video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id,
|
||||
note='Downloading video formats', errnote='Unable to download video formats')
|
||||
def _get_formats(self, *, ep_id=None, aid=None):
|
||||
video_json = self._call_api(
|
||||
'/web/playurl', ep_id or aid, note='Downloading video formats',
|
||||
errnote='Unable to download video formats', query=filter_dict({
|
||||
'platform': 'web',
|
||||
'ep_id': ep_id,
|
||||
'aid': aid,
|
||||
}))
|
||||
video_json = video_json['playurl']
|
||||
formats = []
|
||||
for vid in video_json.get('video') or []:
|
||||
@@ -809,23 +853,19 @@ def _get_formats(self, ep_id):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _extract_ep_info(self, episode_data, ep_id):
|
||||
def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
|
||||
return {
|
||||
'id': ep_id,
|
||||
'title': episode_data.get('title_display') or episode_data['title'],
|
||||
'thumbnail': episode_data.get('cover'),
|
||||
'id': ep_id or aid,
|
||||
'title': video_data.get('title_display') or video_data.get('title'),
|
||||
'thumbnail': video_data.get('cover'),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'^E(\d+)(?:$| - )', episode_data.get('title_display'), 'episode number', default=None)),
|
||||
'formats': self._get_formats(ep_id),
|
||||
'subtitles': self._get_subtitles(ep_id),
|
||||
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid),
|
||||
'extractor_key': BiliIntlIE.ie_key(),
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
from Cryptodome.PublicKey import RSA
|
||||
from Cryptodome.Cipher import PKCS1_v1_5
|
||||
@@ -856,12 +896,9 @@ def _login(self):
|
||||
else:
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
class BiliIntlIE(BiliIntlBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
|
||||
_TESTS = [{
|
||||
# Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/34613/341736',
|
||||
@@ -896,24 +933,35 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# User-generated content (as opposed to a series licensed from a studio)
|
||||
'url': 'https://bilibili.tv/en/video/2019955076',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No language in URL
|
||||
'url': 'https://www.bilibili.tv/video/2019955076',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id, video_id = self._match_valid_url(url).groups()
|
||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||
video_id = ep_id or aid
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
# Bstation layout
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), video_id, fatal=False) or {}
|
||||
episode_data = traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
|
||||
video_data = (
|
||||
traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
|
||||
or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
|
||||
|
||||
if not episode_data:
|
||||
if season_id and not video_data:
|
||||
# Non-Bstation layout, read through episode list
|
||||
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
||||
episode_data = next(
|
||||
episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
|
||||
if str(episode.get('episode_id')) == video_id)
|
||||
return self._extract_ep_info(episode_data, video_id)
|
||||
video_data = traverse_obj(season_json,
|
||||
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id),
|
||||
expected_type=dict, get_all=False)
|
||||
return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
|
||||
|
||||
|
||||
class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
@@ -941,7 +989,7 @@ def _entries(self, series_id):
|
||||
series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
|
||||
for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]):
|
||||
episode_id = str(episode.get('episode_id'))
|
||||
yield self._extract_ep_info(episode, episode_id)
|
||||
yield self._extract_video_info(episode, ep_id=episode_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
@@ -175,7 +175,7 @@ def _extract_thumbnails(self, variants, base_url):
|
||||
|
||||
class BRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Bayerischer Rundfunk Mediathek'
|
||||
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
|
||||
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?P<id>av:[0-9a-f]{24})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
|
||||
@@ -188,6 +188,9 @@ class BRMediathekIE(InfoExtractor):
|
||||
'timestamp': 1511942766,
|
||||
'upload_date': '20171129',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -29,9 +29,8 @@ def _real_extract(self, url):
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, 'video title'),
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title')),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
|
||||
@@ -54,7 +54,7 @@ def _real_extract(self, url):
|
||||
id = episode['id']
|
||||
title = (episode.get('title')
|
||||
or self._og_search_title(webpage, fatal=False)
|
||||
or self._html_search_regex('<title>(.*?)</title>', webpage, 'title'))
|
||||
or self._html_extract_title(webpage))
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -245,10 +245,6 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'upload_date': '20200727',
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# Only available via new API endpoint
|
||||
@@ -264,24 +260,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'episode_number': 5,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
_CONTEXT_ID = 'R3595707040'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
auth_info = self._gigya_login({
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
|
||||
@@ -127,9 +127,9 @@ def _extract_player_init(self, player_init, display_id):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_meta('twitter:title', webpage, 'title', default=None)
|
||||
or self._html_extract_title(webpage))
|
||||
entries = [
|
||||
self._extract_player_init(player_init, display_id)
|
||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||
|
||||
@@ -77,21 +77,21 @@ class CBSIE(CBSBaseIE):
|
||||
(?:
|
||||
cbs:|
|
||||
https?://(?:www\.)?(?:
|
||||
cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
|
||||
cbs\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/|
|
||||
colbertlateshow\.com/(?:video|podcasts)/)
|
||||
)(?P<id>[\w-]+)'''
|
||||
|
||||
# All tests are blocked outside US
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'url': 'https://www.cbs.com/shows/video/xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R/',
|
||||
'info_dict': {
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'id': 'xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
'duration': 1495,
|
||||
'timestamp': 1385585425,
|
||||
'upload_date': '20131127',
|
||||
'title': 'Tough As Nails - Dreams Never Die',
|
||||
'description': 'md5:a3535a62531cdd52b0364248a2c1ae33',
|
||||
'duration': 2588,
|
||||
'timestamp': 1639015200,
|
||||
'upload_date': '20211209',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
@@ -99,14 +99,14 @@ class CBSIE(CBSBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
|
||||
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
|
||||
'info_dict': {
|
||||
'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
|
||||
'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
|
||||
'timestamp': 1624507140,
|
||||
'description': 'md5:e01af24e95c74d55e8775aef86117b95',
|
||||
'id': 'sZH1MGgomIosZgxGJ1l263MFq16oMtW1',
|
||||
'title': 'The Late Show - 3/16/22 (Michael Buble, Rose Matafeo)',
|
||||
'timestamp': 1647488100,
|
||||
'description': 'md5:d0e6ec23c544b7fa8e39a8e6844d2439',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'upload_date': '20210624',
|
||||
'upload_date': '20220317',
|
||||
},
|
||||
'params': {
|
||||
'ignore_no_formats_error': True,
|
||||
|
||||
@@ -54,8 +54,7 @@ def _real_extract(self, url):
|
||||
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
|
||||
webpage, 'kaltura partner_id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
|
||||
title = self._html_extract_title(webpage, 'video title')
|
||||
|
||||
select = self._search_regex(
|
||||
r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
compat_getpass,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_Pattern,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
@@ -41,7 +42,6 @@
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
@@ -49,6 +49,7 @@
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
format_field,
|
||||
@@ -138,6 +139,8 @@ class InfoExtractor(object):
|
||||
for HDS - URL of the F4M manifest,
|
||||
for DASH - URL of the MPD manifest,
|
||||
for MSS - URL of the ISM manifest.
|
||||
* manifest_stream_number (For internal use only)
|
||||
The index of the stream in the manifest file
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@@ -214,7 +217,7 @@ class InfoExtractor(object):
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* downloader_options A dictionary of downloader options as
|
||||
described in FileDownloader
|
||||
described in FileDownloader (For internal use only)
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
@@ -248,14 +251,14 @@ class InfoExtractor(object):
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
upload_date: Video upload date in UTC (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp
|
||||
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||
If it is not clear whether to use timestamp or this, use the former
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
release_date: The date (YYYYMMDD) when the video was released in UTC.
|
||||
If not explicitly set, calculated from release_timestamp
|
||||
modified_timestamp: UNIX timestamp of the moment the video was last modified.
|
||||
modified_date: The date (YYYYMMDD) when the video was last modified.
|
||||
modified_date: The date (YYYYMMDD) when the video was last modified in UTC.
|
||||
If not explicitly set, calculated from modified_timestamp
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
@@ -432,7 +435,15 @@ class InfoExtractor(object):
|
||||
|
||||
Subclasses may also override suitable() if necessary, but ensure the function
|
||||
signature is preserved and that this function imports everything it needs
|
||||
(except other extractors), so that lazy_extractors works correctly
|
||||
(except other extractors), so that lazy_extractors works correctly.
|
||||
|
||||
To support username + password (or netrc) login, the extractor must define a
|
||||
_NETRC_MACHINE and re-define _perform_login(username, password) and
|
||||
(optionally) _initialize_pre_login() methods. The _perform_login method will
|
||||
be called between _initialize_pre_login and _real_initialize if credentials
|
||||
are passed by the user. In cases where it is necessary to have the login
|
||||
process as part of the extraction rather than initialization, _perform_login
|
||||
can be left undefined.
|
||||
|
||||
_GEO_BYPASS attribute may be set to False in order to disable
|
||||
geo restriction bypass mechanisms for a particular extractor.
|
||||
@@ -460,9 +471,11 @@ class InfoExtractor(object):
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
_NETRC_MACHINE = None
|
||||
IE_DESC = None
|
||||
|
||||
_LOGIN_HINTS = {
|
||||
'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials',
|
||||
'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
|
||||
@@ -512,6 +525,10 @@ def working(cls):
|
||||
"""Getter method for _WORKING."""
|
||||
return cls._WORKING
|
||||
|
||||
@classmethod
|
||||
def supports_login(cls):
|
||||
return bool(cls._NETRC_MACHINE)
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
self._printed_messages = set()
|
||||
@@ -520,6 +537,13 @@ def initialize(self):
|
||||
'ip_blocks': self._GEO_IP_BLOCKS,
|
||||
})
|
||||
if not self._ready:
|
||||
self._initialize_pre_login()
|
||||
if self.supports_login():
|
||||
username, password = self._get_login_info()
|
||||
if username:
|
||||
self._perform_login(username, password)
|
||||
elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
|
||||
self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}')
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
||||
@@ -665,6 +689,14 @@ def set_downloader(self, downloader):
|
||||
"""Sets a YoutubeDL instance as the downloader for this IE."""
|
||||
self._downloader = downloader
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
""" Intialization before login. Redefine in subclasses."""
|
||||
pass
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
""" Login with username and password. Redefine in subclasses."""
|
||||
pass
|
||||
|
||||
def _real_initialize(self):
|
||||
"""Real initialization process. Redefine in subclasses."""
|
||||
pass
|
||||
@@ -1005,7 +1037,7 @@ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
return json.loads(json_string)
|
||||
return json.loads(json_string, strict=False)
|
||||
except ValueError as ve:
|
||||
errmsg = '%s: Failed to parse JSON ' % video_id
|
||||
if fatal:
|
||||
@@ -1098,12 +1130,15 @@ def report_login(self):
|
||||
|
||||
def raise_login_required(
|
||||
self, msg='This video is only available for registered users',
|
||||
metadata_available=False, method='any'):
|
||||
metadata_available=False, method=NO_DEFAULT):
|
||||
if metadata_available and (
|
||||
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
|
||||
self.report_warning(msg)
|
||||
return
|
||||
if method is NO_DEFAULT:
|
||||
method = 'any' if self.supports_login() else 'cookies'
|
||||
if method is not None:
|
||||
assert method in self._LOGIN_HINTS, 'Invalid login method'
|
||||
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
@@ -1168,7 +1203,9 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
|
||||
In case of failure return a default value or raise a WARNING or a
|
||||
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||
"""
|
||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||
if string is None:
|
||||
mobj = None
|
||||
elif isinstance(pattern, (str, compat_Pattern)):
|
||||
mobj = re.search(pattern, string, flags)
|
||||
else:
|
||||
for p in pattern:
|
||||
@@ -1264,8 +1301,8 @@ def _get_tfa_info(self, note='two-factor verification code'):
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
|
||||
% {'prop': re.escape(prop), 'sep': '(?::|[:-])'})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
template % (property_re, content_re),
|
||||
@@ -1296,9 +1333,8 @@ def _og_search_thumbnail(self, html, **kargs):
|
||||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
kargs.setdefault('fatal', False)
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
def _og_search_title(self, html, *, fatal=False, **kargs):
|
||||
return self._og_search_property('title', html, fatal=fatal, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
regexes = self._og_regexes('video') + self._og_regexes('video:url')
|
||||
@@ -1309,9 +1345,8 @@ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
def _og_search_url(self, html, **kargs):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_extract_title(self, html, name, **kwargs):
|
||||
return self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', html, name, **kwargs)
|
||||
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
|
||||
return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
name = variadic(name)
|
||||
@@ -1556,7 +1591,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
||||
break
|
||||
traverse_json_ld(json_ld)
|
||||
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
return filter_dict(info)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
@@ -3651,11 +3686,11 @@ def _get_comments(self, *args, **kwargs):
|
||||
|
||||
@staticmethod
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs/data
|
||||
will be dropped. """
|
||||
list1_urls = set([item['url'] for item in subtitle_list1])
|
||||
list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1)
|
||||
ret = list(subtitle_list1)
|
||||
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
|
||||
ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
@@ -3680,9 +3715,8 @@ def _get_automatic_captions(self, *args, **kwargs):
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None
|
||||
or self.get_param('cookiefile')
|
||||
or self.get_param('cookiesfrombrowser')):
|
||||
if (self.supports_login() and self._get_login_info()[0] is not None
|
||||
or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')):
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
|
||||
71
yt_dlp/extractor/craftsy.py
Normal file
71
yt_dlp/extractor/craftsy.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
get_element_by_id,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class CraftsyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.craftsy.com/class/(?P<id>[a-z0-9_-]+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.craftsy.com/class/the-midnight-quilt-show-season-5/',
|
||||
'info_dict': {
|
||||
'id': 'the-midnight-quilt-show-season-5',
|
||||
'title': 'The Midnight Quilt Show Season 5',
|
||||
'description': 'md5:113eda818e985d1a566625fb2f833b7a',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.craftsy.com/class/sew-your-own-designer-handbag/',
|
||||
'info_dict': {
|
||||
'id': 'sew-your-own-designer-handbag',
|
||||
'title': 'Sew Your Own Designer Handbag',
|
||||
'description': 'md5:8270d0ef5427d3c895a27351aeaac276',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.craftsy.com/class/all-access-estes-park-wool-market/',
|
||||
'info_dict': {
|
||||
'id': 'all-access-estes-park-wool-market',
|
||||
'title': 'All Access: Estes Park Wool Market',
|
||||
'description': 'md5:aded1bd8d38ae2fae4dae936c0ae01e7',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'class_video_player_vars\s*=\s*({.*})\s*;',
|
||||
get_element_by_id('vidstore-classes_class-video-player-js-extra', webpage),
|
||||
'video data'), video_id, transform_source=js_to_json)
|
||||
|
||||
account_id = traverse_obj(video_data, ('video_player', 'bc_account_id'))
|
||||
|
||||
entries = []
|
||||
class_preview = traverse_obj(video_data, ('video_player', 'class_preview'))
|
||||
if class_preview:
|
||||
v_id = class_preview.get('video_id')
|
||||
entries.append(self.url_result(
|
||||
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={v_id}',
|
||||
BrightcoveNewIE, v_id, class_preview.get('title')))
|
||||
|
||||
if dict_get(video_data, ('is_free', 'user_has_access')):
|
||||
entries += [
|
||||
self.url_result(
|
||||
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}',
|
||||
BrightcoveNewIE, lesson['video_id'], lesson.get('title'))
|
||||
for lesson in video_data['lessons']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, video_id, video_data.get('class_title'),
|
||||
self._html_search_meta(('og:description', 'description'), webpage, default=None))
|
||||
@@ -9,7 +9,7 @@
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from .vrv import VRVBaseIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_Element,
|
||||
@@ -57,10 +57,7 @@ def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
def _perform_login(self, username, password):
|
||||
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
return
|
||||
|
||||
@@ -89,8 +86,21 @@ def _login(self):
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
# Beta-specific, but needed for redirects
|
||||
def _get_beta_embedded_json(self, webpage, display_id):
|
||||
initial_state = self._parse_json(self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
|
||||
app_config = self._parse_json(self._search_regex(
|
||||
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
|
||||
return initial_state, app_config
|
||||
|
||||
def _redirect_to_beta(self, webpage, iekey, video_id):
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Received a beta page from non-beta url when not logged in.')
|
||||
initial_state, app_config = self._get_beta_embedded_json(webpage, video_id)
|
||||
url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname']
|
||||
self.to_screen(f'{video_id}: Redirected to beta site - {url}')
|
||||
return self.url_result(f'{url}', iekey, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
@@ -106,7 +116,7 @@ def _add_skip_wall(url):
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
@@ -412,6 +422,8 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(webpage_url), video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if re.search(r'<div id="preload-data">', webpage):
|
||||
return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id)
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
@@ -676,6 +688,8 @@ def _real_extract(self, url):
|
||||
# https:// gives a 403, but http:// does not
|
||||
self._add_skip_wall(url).replace('https://', 'http://'), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if re.search(r'<div id="preload-data">', webpage):
|
||||
return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id)
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
|
||||
@@ -698,9 +712,56 @@ def _real_extract(self, url):
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
params = None
|
||||
|
||||
def _get_params(self, lang):
|
||||
if not CrunchyrollBetaBaseIE.params:
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
|
||||
f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % app_config['cxApiParams']['accountAuthClientId']).encode('ascii')), 'ascii')
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', None, note='Authenticating with cookie',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
bucket = policy_response['cms']['bucket']
|
||||
params = {
|
||||
'Policy': policy_response['cms']['policy'],
|
||||
'Signature': policy_response['cms']['signature'],
|
||||
'Key-Pair-Id': policy_response['cms']['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBetaBaseIE.params
|
||||
|
||||
def _redirect_from_beta(self, url, lang, internal_id, display_id, is_episode, iekey):
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(url, display_id), display_id)
|
||||
content_data = initial_state['content']['byId'][internal_id]
|
||||
if is_episode:
|
||||
video_id = content_data['external_id'].split('.')[1]
|
||||
series_id = content_data['episode_metadata']['series_slug_title']
|
||||
else:
|
||||
series_id = content_data['slug_title']
|
||||
series_id = re.sub(r'-{2,}', '-', series_id)
|
||||
url = f'https://www.crunchyroll.com/{lang}{series_id}'
|
||||
if is_episode:
|
||||
url = url + f'/{display_id}-{video_id}'
|
||||
self.to_screen(f'{display_id}: Not logged in. Redirecting to non-beta site - {url}')
|
||||
return self.url_result(url, iekey, display_id)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:beta'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
@@ -711,51 +772,49 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
'uploader': 'Toei Animation',
|
||||
'title': 'World Trigger Episode 73 – To the Future',
|
||||
'upload_date': '20160402',
|
||||
'episode_number': 73,
|
||||
'series': 'World Trigger',
|
||||
'average_rating': 4.9,
|
||||
'episode': 'To the Future',
|
||||
'season': 'World Trigger',
|
||||
'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/c870dedca1a83137c2d3d144984155ed1459527119_main.jpg',
|
||||
'season_number': 1,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Unable to download XML']
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GYK53DMPR/wicked-lord-shingan-reborn',
|
||||
'info_dict': {
|
||||
'id': '648781',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1389173400,
|
||||
'series': 'Love, Chunibyo & Other Delusions - Heart Throb -',
|
||||
'description': 'md5:5579d1a0355cc618558ba23d27067a62',
|
||||
'uploader': 'TBS',
|
||||
'episode': 'Wicked Lord Shingan... Reborn',
|
||||
'average_rating': 4.9,
|
||||
'season': 'Love, Chunibyo & Other Delusions - Heart Throb -',
|
||||
'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/2ba0384e225a5370d5f0ee9496d91ea51389046521_main.jpg',
|
||||
'title': 'Love, Chunibyo & Other Delusions - Heart Throb - Episode 1 – Wicked Lord Shingan... Reborn',
|
||||
'season_number': 2,
|
||||
'upload_date': '20140108',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Unable to download XML']
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_state = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
|
||||
display_id)
|
||||
episode_data = initial_state['content']['byId'][internal_id]
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
app_config = self._parse_json(
|
||||
self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
|
||||
display_id)
|
||||
client_id = app_config['cxApiParams']['accountAuthClientId']
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', display_id,
|
||||
note='Authenticating with cookie',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', display_id,
|
||||
note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
bucket = policy_response['cms']['bucket']
|
||||
params = {
|
||||
'Policy': policy_response['cms']['policy'],
|
||||
'Signature': policy_response['cms']['signature'],
|
||||
'Key-Pair-Id': policy_response['cms']['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
episode_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
||||
note='Retrieving episode metadata',
|
||||
@@ -833,9 +892,9 @@ def _real_extract(self, url):
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist:beta'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
@@ -843,12 +902,57 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
'title': 'Girl Friend BETA',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/series/GYJQV73V6/love-chunibyo--other-delusions---heart-throb--',
|
||||
'info_dict': {
|
||||
'id': 'love-chunibyo-other-delusions-heart-throb-',
|
||||
'title': 'Love, Chunibyo & Other Delusions - Heart Throb -',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, series_id = self._match_valid_url(url).group('lang', 'id')
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}',
|
||||
CrunchyrollShowPlaylistIE.ie_key(), series_id)
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
series_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
|
||||
note='Retrieving series metadata', query=params)
|
||||
|
||||
seasons_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
|
||||
note='Retrieving season list', query=params)
|
||||
|
||||
def entries():
|
||||
for season in seasons_response['items']:
|
||||
episodes_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
|
||||
note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
|
||||
for episode in episodes_response['items']:
|
||||
episode_id = episode['id']
|
||||
episode_display_id = episode['slug_title']
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
|
||||
'ie_key': CrunchyrollBetaIE.ie_key(),
|
||||
'id': episode_id,
|
||||
'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
|
||||
'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
|
||||
'duration': float_or_none(episode.get('duration_ms'), 1000),
|
||||
'series': episode.get('series_title'),
|
||||
'series_id': episode.get('series_id'),
|
||||
'season': episode.get('season_title'),
|
||||
'season_id': episode.get('season_id'),
|
||||
'season_number': episode.get('season_number'),
|
||||
'episode': episode.get('title'),
|
||||
'episode_number': episode.get('sequence_number')
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), internal_id, series_response.get('title'))
|
||||
|
||||
@@ -278,7 +278,7 @@ def _real_extract(self, url):
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title'))
|
||||
or self._html_extract_title(webpage, 'video title'))
|
||||
description = (self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, 'description', default=None))
|
||||
|
||||
|
||||
@@ -33,14 +33,11 @@ def _call_api(self, path, video_id, query=None):
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
def _perform_login(self, username, password):
|
||||
result = self._download_json(
|
||||
'https://api.curiositystream.com/v1/login', None,
|
||||
note='Logging in', data=urlencode_postdata({
|
||||
'email': email,
|
||||
'email': username,
|
||||
'password': password,
|
||||
}))
|
||||
self._handle_errors(result)
|
||||
|
||||
146
yt_dlp/extractor/cybrary.py
Normal file
146
yt_dlp/extractor/cybrary.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# coding: utf-8
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
class CybraryBaseIE(InfoExtractor):
|
||||
_API_KEY = 'AIzaSyCX9ru6j70PX2My1Eq6Q1zoMAhuTdXlzSw'
|
||||
_ENDPOINTS = {
|
||||
'course': 'https://app.cybrary.it/courses/api/catalog/browse/course/{}',
|
||||
'course_enrollment': 'https://app.cybrary.it/courses/api/catalog/{}/enrollment',
|
||||
'enrollment': 'https://app.cybrary.it/courses/api/enrollment/{}',
|
||||
'launch': 'https://app.cybrary.it/courses/api/catalog/{}/launch',
|
||||
'vimeo_oembed': 'https://vimeo.com/api/oembed.json?url=https://vimeo.com/{}',
|
||||
}
|
||||
_NETRC_MACHINE = 'cybrary'
|
||||
_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
CybraryBaseIE._TOKEN = self._download_json(
|
||||
f'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={self._API_KEY}',
|
||||
None, data=urlencode_postdata({'email': username, 'password': password, 'returnSecureToken': True}),
|
||||
note='Logging in')['idToken']
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
def _call_api(self, endpoint, item_id):
|
||||
return self._download_json(
|
||||
self._ENDPOINTS[endpoint].format(item_id), item_id,
|
||||
note=f'Downloading {endpoint} JSON metadata',
|
||||
headers={'Authorization': f'Bearer {self._TOKEN}'})
|
||||
|
||||
def _get_vimeo_id(self, activity_id):
|
||||
launch_api = self._call_api('launch', activity_id)
|
||||
|
||||
if launch_api.get('url'):
|
||||
return self._search_regex(r'https?://player\.vimeo\.com/video/(?P<vimeo_id>[0-9]+)', launch_api['url'], 'vimeo_id')
|
||||
return traverse_obj(launch_api, ('vendor_data', 'content', ..., 'videoId'), get_all=False)
|
||||
|
||||
|
||||
class CybraryIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https?://app.cybrary.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
|
||||
'md5': '9ae12d37e555cb2ed554223a71a701d0',
|
||||
'info_dict': {
|
||||
'id': '646609770',
|
||||
'ext': 'mp4',
|
||||
'title': 'Getting Started',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1301817996-76a268f0c56cff18a5cecbbdc44131eb9dda0c80eb0b3a036_1280',
|
||||
'series_id': '63111',
|
||||
'uploader_url': 'https://vimeo.com/user30867300',
|
||||
'duration': 88,
|
||||
'uploader_id': 'user30867300',
|
||||
'series': 'Cybrary Orientation',
|
||||
'uploader': 'Cybrary',
|
||||
'chapter': 'Cybrary Orientation Series',
|
||||
'chapter_id': '63110'
|
||||
},
|
||||
'expected_warnings': ['No authenticators for vimeo']
|
||||
}, {
|
||||
'url': 'https://app.cybrary.it/immersive/12747143/activity/52686',
|
||||
'md5': '62f26547dccc59c44363e2a13d4ad08d',
|
||||
'info_dict': {
|
||||
'id': '445638073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Azure Virtual Network IP Addressing',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/936667051-1647ace66c627d4a2382185e0dae8deb830309bfddd53f8b2367b2f91e92ed0e-d_1280',
|
||||
'series_id': '52733',
|
||||
'uploader_url': 'https://vimeo.com/user30867300',
|
||||
'duration': 426,
|
||||
'uploader_id': 'user30867300',
|
||||
'series': 'AZ-500: Microsoft Azure Security Technologies',
|
||||
'uploader': 'Cybrary',
|
||||
'chapter': 'Implement Network Security',
|
||||
'chapter_id': '52693'
|
||||
},
|
||||
'expected_warnings': ['No authenticators for vimeo']
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
activity_id, enrollment_id = self._match_valid_url(url).group('id', 'enrollment')
|
||||
course = self._call_api('enrollment', enrollment_id)['content']
|
||||
activity = traverse_obj(course, ('learning_modules', ..., 'activities', lambda _, v: int(activity_id) == v['id']), get_all=False)
|
||||
|
||||
if activity.get('type') not in ['Video Activity', 'Lesson Activity']:
|
||||
raise ExtractorError('The activity is not a video', expected=True)
|
||||
|
||||
module = next((m for m in course.get('learning_modules') or []
|
||||
if int(activity_id) in traverse_obj(m, ('activities', ..., 'id') or [])), None)
|
||||
|
||||
vimeo_id = self._get_vimeo_id(activity_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'series': traverse_obj(course, ('content_description', 'title')),
|
||||
'series_id': str_or_none(traverse_obj(course, ('content_description', 'id'))),
|
||||
'id': vimeo_id,
|
||||
'chapter': module.get('title'),
|
||||
'chapter_id': str_or_none(module.get('id')),
|
||||
'title': activity.get('title'),
|
||||
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}})
|
||||
}
|
||||
|
||||
|
||||
class CybraryCourseIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https://app.cybrary.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
|
||||
'info_dict': {
|
||||
'id': 898,
|
||||
'title': 'AZ-500: Microsoft Azure Security Technologies',
|
||||
'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
|
||||
},
|
||||
'playlist_count': 59
|
||||
}, {
|
||||
'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
|
||||
'info_dict': {
|
||||
'id': 1245,
|
||||
'title': 'Cybrary Orientation',
|
||||
'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
|
||||
},
|
||||
'playlist_count': 4
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
course = self._call_api('course', course_id)
|
||||
enrollment_info = self._call_api('course_enrollment', course['id'])
|
||||
|
||||
entries = [self.url_result(
|
||||
f'https://app.cybrary.it/immersive/{enrollment_info["id"]}/activity/{activity["id"]}')
|
||||
for activity in traverse_obj(course, ('content_item', 'learning_modules', ..., 'activities', ...))]
|
||||
|
||||
return self.playlist_result(
|
||||
entries,
|
||||
traverse_obj(course, ('content_item', 'id'), expected_type=str_or_none),
|
||||
course.get('title'), course.get('short_description'))
|
||||
@@ -4,30 +4,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DaftsexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://daftsex.com/watch/-35370899_456246186',
|
||||
'md5': 'd95135e6cea2d905bea20dbe82cda64a',
|
||||
'info_dict': {
|
||||
'id': '-35370899_456246186',
|
||||
'ext': 'mp4',
|
||||
'title': 'just relaxing',
|
||||
'description': 'just relaxing - Watch video Watch video in high quality',
|
||||
'upload_date': '20201113',
|
||||
'timestamp': 1605261911,
|
||||
'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://daftsex.com/watch/-156601359_456242791',
|
||||
'info_dict': {
|
||||
'id': '-156601359_456242791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skye Blue - Dinner And A Show',
|
||||
'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality',
|
||||
'upload_date': '20200916',
|
||||
'timestamp': 1600250735,
|
||||
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_elements_by_class('heading', webpage)[-1]
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
|
||||
description = self._html_search_meta('description', webpage, 'Description', default=None)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
|
||||
webpage, 'duration', fatal=False))
|
||||
@@ -52,28 +72,75 @@ def _real_extract(self, url):
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
|
||||
|
||||
cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {}
|
||||
if cdn_files:
|
||||
formats = []
|
||||
for format_id, format_data in cdn_files.items():
|
||||
ext, height = format_id.split('_')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')),
|
||||
'timestamp': timestamp,
|
||||
'view_count': views,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
item = self._download_json(
|
||||
f'{server_domain}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': video_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': video_params['c_key'],
|
||||
'credentials': video_params['video']['credentials'],
|
||||
})['response']['items'][0]
|
||||
|
||||
formats = []
|
||||
for format_id, format_data in video_params['video']['cdn_files'].items():
|
||||
ext, height = format_id.split('_')
|
||||
extra_quality_data = format_data.split('.')[-1]
|
||||
url = f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={extra_quality_data}'
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height))
|
||||
if height_extra_key:
|
||||
formats.append({
|
||||
'format_id': f'{height}p',
|
||||
'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = try_get(video_params,
|
||||
lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8'))
|
||||
thumbnails = []
|
||||
for k, v in item.items():
|
||||
if k.startswith('photo_') and v:
|
||||
width = k.replace('photo_', '')
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': v,
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'view_count': views,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
@@ -94,10 +94,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
)
|
||||
/(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_TESTS = [{
|
||||
@@ -115,6 +115,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
'md5': 'e2f9717c6604773f963f069ca53a07f8',
|
||||
'info_dict': {
|
||||
'id': 'x89eyek',
|
||||
'ext': 'mp4',
|
||||
'title': "En quête d'esprit du 27/03/2022",
|
||||
'description': 'md5:66542b9f4df2eb23f314fc097488e553',
|
||||
'duration': 2756,
|
||||
'timestamp': 1648383669,
|
||||
'upload_date': '20220327',
|
||||
'uploader': 'CNEWS',
|
||||
'uploader_id': 'x24vth',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': 'https://s2.dmcdn.net/v/Tncwi1YGKdvFbDuDY/x1080',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
|
||||
@@ -45,10 +45,7 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
self.raise_login_required()
|
||||
def _perform_login(self, username, password):
|
||||
token_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
@@ -78,7 +75,8 @@ def _login(self):
|
||||
self.raise_login_required(msg='Login info incorrect')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
if not self._ACCESS_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
def _entries(self, items, language, **kwargs):
|
||||
for item in items:
|
||||
|
||||
@@ -123,7 +123,7 @@ def _real_extract(self, url):
|
||||
self._login(display_id)
|
||||
webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
|
||||
finally:
|
||||
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out')
|
||||
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
|
||||
|
||||
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -139,7 +139,7 @@ def _real_extract(self, url):
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VHXEmbedIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'),
|
||||
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage, fatal=False),
|
||||
|
||||
@@ -26,7 +26,7 @@ def _extract_video(self, data, video_id):
|
||||
duration = None
|
||||
for entry in data.get('media'):
|
||||
if entry.get('id') == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
entry['url'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
duration = int_or_none(entry.get('duration'))
|
||||
@@ -48,6 +48,7 @@ def get_insight(kind):
|
||||
'view_count': get_insight('view'),
|
||||
'like_count': get_insight('like'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,30 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
base_url,
|
||||
ExtractorError,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class ElonetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# m3u8 with subtitles
|
||||
'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
|
||||
'md5': '8efc954b96c543711707f87de757caea',
|
||||
'info_dict': {
|
||||
'id': '107867',
|
||||
'ext': 'mp4',
|
||||
'title': 'Valkoinen peura',
|
||||
'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...',
|
||||
'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large',
|
||||
'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_107867.+',
|
||||
'description': 'md5:bded4201c9677fab10854884fe8f7312',
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}, {
|
||||
# DASH with subtitles
|
||||
'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539',
|
||||
@@ -32,58 +24,45 @@ class ElonetIE(InfoExtractor):
|
||||
'id': '116539',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minulla on tiikeri',
|
||||
'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...',
|
||||
'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr',
|
||||
}
|
||||
'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_116539.+',
|
||||
'description': 'md5:5ab72b3fe76d3414e46cc8f277104419',
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}, {
|
||||
# Page with multiple videos, download the main one
|
||||
'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_117396',
|
||||
'info_dict': {
|
||||
'id': '117396',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sampo',
|
||||
'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_117396.+',
|
||||
'description': 'md5:ec69572a5b054d0ecafe8086b1fa96f7',
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<meta .*property="og:title" .*content="(.+?)"', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta .*property="og:description" .*content="(.+?)"', webpage, 'description')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<meta .*property="og:image" .*content="(.+?)"', webpage, 'thumbnail')
|
||||
src = self._parse_json(self._html_search_regex(
|
||||
r'id=\'video-data\'[^>]+data-video-sources="([^"]+)"', webpage, 'json'), video_id)[0]['src']
|
||||
ext = determine_ext(src)
|
||||
|
||||
json_s = self._html_search_regex(
|
||||
r'data-video-sources="(.+?)"', webpage, 'json')
|
||||
src = try_get(
|
||||
self._parse_json(json_s, video_id),
|
||||
lambda x: x[0]["src"], compat_str)
|
||||
formats = []
|
||||
subtitles = {}
|
||||
if re.search(r'\.m3u8\??', src):
|
||||
res = self._download_webpage_handle(
|
||||
# elonet servers have certificate problems
|
||||
src.replace('https:', 'http:'), video_id,
|
||||
note='Downloading m3u8 information',
|
||||
errnote='Failed to download m3u8 information')
|
||||
if res:
|
||||
doc, urlh = res
|
||||
url = urlh.geturl()
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url)
|
||||
for f in formats:
|
||||
f['ext'] = 'mp4'
|
||||
elif re.search(r'\.mpd\??', src):
|
||||
res = self._download_xml_handle(
|
||||
src, video_id,
|
||||
note='Downloading MPD manifest',
|
||||
errnote='Failed to download MPD manifest')
|
||||
if res:
|
||||
doc, urlh = res
|
||||
url = base_url(urlh.geturl())
|
||||
formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url)
|
||||
if ext == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(src, video_id, fatal=False)
|
||||
else:
|
||||
raise ExtractorError("Unknown streaming format")
|
||||
formats, subtitles = [], {}
|
||||
self.raise_no_formats(f'Unknown streaming format {ext}')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -39,11 +39,7 @@ class EroProfileIE(InfoExtractor):
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
query = compat_urllib_parse_urlencode({
|
||||
'username': username,
|
||||
'password': password,
|
||||
@@ -62,9 +58,6 @@ def _login(self):
|
||||
r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
|
||||
self._download_webpage(redirect_url, None, False)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
@@ -122,6 +123,10 @@
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
BanByeIE,
|
||||
BanByeChannelIE,
|
||||
)
|
||||
from .bandaichannel import BandaiChannelIE
|
||||
from .bandcamp import (
|
||||
BandcampIE,
|
||||
@@ -318,6 +323,7 @@
|
||||
from .cozytv import CozyTVIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .craftsy import CraftsyIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
@@ -340,6 +346,10 @@
|
||||
CuriosityStreamSeriesIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .cybrary import (
|
||||
CybraryIE,
|
||||
CybraryCourseIE
|
||||
)
|
||||
from .daftsex import DaftsexIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@@ -622,6 +632,7 @@
|
||||
HSEProductIE,
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huya import HuyaLiveIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hungama import (
|
||||
HungamaIE,
|
||||
@@ -673,6 +684,12 @@
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
|
||||
from .itprotv import (
|
||||
ITProTVIE,
|
||||
ITProTVCourseIE
|
||||
)
|
||||
|
||||
from .itv import (
|
||||
ITVIE,
|
||||
ITVBTCCIE,
|
||||
@@ -684,6 +701,10 @@
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jable import (
|
||||
JableIE,
|
||||
JablePlaylistIE,
|
||||
)
|
||||
from .jamendo import (
|
||||
JamendoIE,
|
||||
JamendoAlbumIE,
|
||||
@@ -730,6 +751,11 @@
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
LastFMUserIE,
|
||||
)
|
||||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
@@ -894,6 +920,7 @@
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
@@ -1152,6 +1179,11 @@
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .panopto import (
|
||||
PanoptoIE,
|
||||
PanoptoListIE,
|
||||
PanoptoPlaylistIE
|
||||
)
|
||||
from .paramountplus import (
|
||||
ParamountPlusIE,
|
||||
ParamountPlusSeriesIE,
|
||||
@@ -1220,6 +1252,7 @@
|
||||
from .pokemon import (
|
||||
PokemonIE,
|
||||
PokemonWatchIE,
|
||||
PokemonSoundLibraryIE,
|
||||
)
|
||||
from .pokergo import (
|
||||
PokerGoIE,
|
||||
@@ -1955,6 +1988,11 @@
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wasdtv import (
|
||||
WASDTVStreamIE,
|
||||
WASDTVRecordIE,
|
||||
WASDTVClipIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchbox import WatchBoxIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
@@ -2085,18 +2123,17 @@
|
||||
EWETVIE,
|
||||
GlattvisionTVIE,
|
||||
MNetTVIE,
|
||||
MyVisionTVIE,
|
||||
NetPlusIE,
|
||||
OsnatelTVIE,
|
||||
QuantumTVIE,
|
||||
QuicklineIE,
|
||||
QuicklineLiveIE,
|
||||
SaltTVIE,
|
||||
SAKTVIE,
|
||||
VTXTVIE,
|
||||
WalyTVIE,
|
||||
ZattooIE,
|
||||
ZattooLiveIE,
|
||||
ZattooMoviesIE,
|
||||
ZattooRecordingsIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zee5 import (
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
@@ -328,11 +329,7 @@ def _extract_urls(webpage):
|
||||
urls.append(mobj.group('url'))
|
||||
return urls
|
||||
|
||||
def _login(self):
|
||||
useremail, password = self._get_login_info()
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
self._set_cookie('facebook.com', 'locale', 'en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
@@ -344,7 +341,7 @@ def _login(self):
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||
|
||||
login_form = {
|
||||
'email': useremail,
|
||||
'email': username,
|
||||
'pass': password,
|
||||
'lsd': lsd,
|
||||
'lgnrnd': lgnrnd,
|
||||
@@ -391,9 +388,6 @@ def _login(self):
|
||||
self.report_warning('unable to log in: %s' % error_to_compat_str(err))
|
||||
return
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _extract_from_url(self, url, video_id):
|
||||
webpage = self._download_webpage(
|
||||
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||
@@ -403,13 +397,13 @@ def extract_metadata(webpage):
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
|
||||
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
|
||||
title = traverse_obj(media, (..., 'title', 'text'), get_all=False)
|
||||
description = traverse_obj(media, (
|
||||
..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False)
|
||||
uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False)
|
||||
or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {})
|
||||
media = traverse_obj(
|
||||
post,
|
||||
(..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'),
|
||||
expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
|
||||
@@ -49,30 +49,26 @@ class FancodeVodIE(InfoExtractor):
|
||||
'referer': 'https://fancode.com',
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
def _perform_login(self, username, password):
|
||||
# Access tokens are shortlived, so get them using the refresh token.
|
||||
username, password = self._get_login_info()
|
||||
if username == 'refresh' and password is not None:
|
||||
self.report_login()
|
||||
data = '''{
|
||||
"query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
|
||||
"variables":{
|
||||
"refreshToken":"%s"
|
||||
},
|
||||
"operationName":"RefreshToken"
|
||||
}''' % password
|
||||
|
||||
token_json = self.download_gql('refresh token', data, "Getting the Access token")
|
||||
self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
|
||||
if self._ACCESS_TOKEN is None:
|
||||
self.report_warning('Failed to get Access token')
|
||||
else:
|
||||
self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
|
||||
elif username is not None:
|
||||
if username != 'refresh':
|
||||
self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self.report_login()
|
||||
data = '''{
|
||||
"query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
|
||||
"variables":{
|
||||
"refreshToken":"%s"
|
||||
},
|
||||
"operationName":"RefreshToken"
|
||||
}''' % password
|
||||
|
||||
token_json = self.download_gql('refresh token', data, "Getting the Access token")
|
||||
self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
|
||||
if self._ACCESS_TOKEN is None:
|
||||
self.report_warning('Failed to get Access token')
|
||||
else:
|
||||
self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
|
||||
|
||||
def _check_login_required(self, is_available, is_premium):
|
||||
msg = None
|
||||
|
||||
@@ -212,7 +212,6 @@ def _real_extract(self, url):
|
||||
'Accept': '*/*',
|
||||
'User-Agent': std_headers['User-Agent'],
|
||||
})
|
||||
ws.__enter__()
|
||||
|
||||
self.write_debug('[debug] Sending HLS server request')
|
||||
|
||||
|
||||
@@ -75,8 +75,7 @@ def _real_extract(self, url):
|
||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, 'duration', default=None))
|
||||
|
||||
|
||||
@@ -29,8 +29,7 @@ def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
|
||||
title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com')
|
||||
description = get_element_by_id('inf_tit', webpage)
|
||||
|
||||
# The default user-agent with foxgay cookies leads to pages without videos
|
||||
|
||||
@@ -7,12 +7,14 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FptplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)'
|
||||
_VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
|
||||
_GEO_COUNTRIES = ['VN']
|
||||
IE_NAME = 'fptplay'
|
||||
IE_DESC = 'fptplay.vn'
|
||||
@@ -22,7 +24,7 @@ class FptplayIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '621a123016f369ebbde55945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love',
|
||||
'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
|
||||
'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
|
||||
},
|
||||
}, {
|
||||
@@ -31,25 +33,42 @@ class FptplayIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '61f3aa8a6b3b1d2e73c60eb5',
|
||||
'ext': 'mp4',
|
||||
'title': 'Má Tôi Là Đại Gia - 3',
|
||||
'title': 'Má Tôi Là Đại Gia - Tập 3',
|
||||
'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
|
||||
'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
|
||||
'info_dict': {
|
||||
'id': '6222d9684ec7230fa6e627a2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lạp Tội Đồ Giám - Tập 2B',
|
||||
'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode')
|
||||
webpage = self._download_webpage(url, video_id=video_id, fatal=False)
|
||||
info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id)
|
||||
video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
|
||||
webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
|
||||
title = self._search_regex(
|
||||
r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
|
||||
real_episode = slug_episode if not title else self._search_regex(
|
||||
r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
|
||||
title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
|
||||
|
||||
info = self._download_json(
|
||||
self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(
|
||||
self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'title': join_nonempty(title, real_episode, delim=' - '),
|
||||
'description': (
|
||||
clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
|
||||
or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -1,18 +1,45 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
# playlist
|
||||
'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': 'hasta-dente',
|
||||
'title': 'Hasta Dente',
|
||||
'description': 'md5:57479af50648d14e9bb649e6b1f8f911',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20201024',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jeudi, vous avez dit bizarre ?',
|
||||
'description': 'md5:47cf1e00cc21c86b0210279996a812c6',
|
||||
'duration': 604,
|
||||
'upload_date': '20201024',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1603576680
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
'display_id': 'rendez-vous-au-pays-des-geeks',
|
||||
@@ -20,9 +47,9 @@ class FranceCultureIE(InfoExtractor):
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
'duration': 3569,
|
||||
},
|
||||
}, {
|
||||
# no thumbnail
|
||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||
@@ -31,9 +58,54 @@ class FranceCultureIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info = {
|
||||
'id': display_id,
|
||||
'title': self._html_search_regex(
|
||||
r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=self._og_search_title(webpage)),
|
||||
'description': self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': self._html_search_regex(
|
||||
r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
|
||||
'upload_date': unified_strdate(self._html_search_regex(
|
||||
r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)),
|
||||
}
|
||||
|
||||
playlist_data = self._search_regex(
|
||||
r'''(?sx)
|
||||
<section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*>
|
||||
(.*?)
|
||||
</section>
|
||||
''',
|
||||
webpage, 'playlist data', fatal=False, default=None)
|
||||
|
||||
if playlist_data:
|
||||
entries = []
|
||||
for item, item_description in re.findall(
|
||||
r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>',
|
||||
playlist_data):
|
||||
|
||||
item_attributes = extract_attributes(item)
|
||||
entries.append({
|
||||
'id': item_attributes.get('data-emission-uuid'),
|
||||
'url': item_attributes.get('data-url'),
|
||||
'title': item_attributes.get('data-diffusion-title'),
|
||||
'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')),
|
||||
'description': item_description,
|
||||
'timestamp': int_or_none(item_attributes.get('data-start-time')),
|
||||
'thumbnail': info['thumbnail'],
|
||||
'uploader': info['uploader'],
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
**info
|
||||
}
|
||||
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'''(?sx)
|
||||
(?:
|
||||
@@ -43,31 +115,14 @@ def _real_extract(self, url):
|
||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||
''',
|
||||
webpage, 'video data'))
|
||||
|
||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||
webpage, 'thumbnail', default=None)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
video_url = traverse_obj(video_data, 'data-url', 'data-asset-source')
|
||||
ext = determine_ext(video_url.lower())
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if ext == 'mp3' else None,
|
||||
'uploader': uploader,
|
||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||
'duration': int_or_none(video_data.get('data-duration')),
|
||||
**info
|
||||
}
|
||||
|
||||
@@ -187,7 +187,7 @@ def _extract_video(self, video_id, catalogue=None):
|
||||
'protocol': 'mhtml',
|
||||
'url': 'about:invalid',
|
||||
'fragments': [{
|
||||
'path': sheet,
|
||||
'url': sheet,
|
||||
# XXX: not entirely accurate; each spritesheet seems to be
|
||||
# a 10×10 grid of thumbnails corresponding to approximately
|
||||
# 2 seconds of the video; the last spritesheet may be shorter
|
||||
|
||||
@@ -28,14 +28,7 @@ class FrontendMastersBaseIE(InfoExtractor):
|
||||
'high': {'width': 1920, 'height': 1080}
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
|
||||
@@ -36,9 +36,8 @@ def _get_region(self):
|
||||
note='Checking geo-location', errnote='Unable to fetch geo-location information'),
|
||||
'region') or 'US'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
def _perform_login(self, username, password):
|
||||
if self._TOKEN:
|
||||
return
|
||||
try:
|
||||
data = self._download_json(
|
||||
@@ -47,7 +46,7 @@ def _login(self):
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))
|
||||
return data['token']
|
||||
FunimationBaseIE._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)['error']
|
||||
@@ -90,8 +89,6 @@ class FunimationPageIE(FunimationBaseIE):
|
||||
def _real_initialize(self):
|
||||
if not self._REGION:
|
||||
FunimationBaseIE._REGION = self._get_region()
|
||||
if not self._TOKEN:
|
||||
FunimationBaseIE._TOKEN = self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
|
||||
@@ -154,10 +151,6 @@ class FunimationIE(FunimationBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._TOKEN:
|
||||
FunimationBaseIE._TOKEN = self._login()
|
||||
|
||||
@staticmethod
|
||||
def _get_experiences(episode):
|
||||
for lang, lang_data in episode.get('languages', {}).items():
|
||||
@@ -340,7 +333,7 @@ def _real_extract(self, url):
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
|
||||
% show_info.get('id'), display_id)
|
||||
|
||||
vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item'))
|
||||
vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user