mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-18 15:15:42 +07:00
Compare commits
96 Commits
2025.10.22
...
release
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa220d0aaa | ||
|
|
7a52ff29d8 | ||
|
|
0c7e4cfcae | ||
|
|
29fe515d8d | ||
|
|
1d43fa5af8 | ||
|
|
fa16dc5241 | ||
|
|
04050be583 | ||
|
|
7bd79d9296 | ||
|
|
29e2570378 | ||
|
|
c70b57c03e | ||
|
|
025191fea6 | ||
|
|
36b29bb353 | ||
|
|
7ec6b9bc40 | ||
|
|
f7acf3c1f4 | ||
|
|
017d76edcf | ||
|
|
56ea3a00ea | ||
|
|
2a777ecbd5 | ||
|
|
023e4db9af | ||
|
|
4433b3a217 | ||
|
|
419776ecf5 | ||
|
|
2801650268 | ||
|
|
26c2545b87 | ||
|
|
12d411722a | ||
|
|
e564b4a808 | ||
|
|
715af0c636 | ||
|
|
0c696239ef | ||
|
|
3cb5e4db54 | ||
|
|
6842620d56 | ||
|
|
20f83f208e | ||
|
|
c2e7e9cdb2 | ||
|
|
2c9f0c3456 | ||
|
|
0eed3fe530 | ||
|
|
a4c72acc46 | ||
|
|
9daba4f442 | ||
|
|
854fded114 | ||
|
|
5f66ac71f6 | ||
|
|
4cb5e191ef | ||
|
|
6ee6a6fc58 | ||
|
|
23f1ab3469 | ||
|
|
af285016d2 | ||
|
|
1dd84b9d1c | ||
|
|
b333ef1b3f | ||
|
|
4e680db150 | ||
|
|
45a3b42bb9 | ||
|
|
d6aa8c235d | ||
|
|
947e788340 | ||
|
|
5977782142 | ||
|
|
335653be82 | ||
|
|
bf7e04e9d8 | ||
|
|
cb78440e46 | ||
|
|
b46c572b26 | ||
|
|
7eff676183 | ||
|
|
1ac7e6005c | ||
|
|
f3597cfafc | ||
|
|
3ef867451c | ||
|
|
ade8c2b36f | ||
|
|
19c5d7c530 | ||
|
|
e6414d64e7 | ||
|
|
c96e9291ab | ||
|
|
4b4223b436 | ||
|
|
c63b4e2a2b | ||
|
|
f3c255b63b | ||
|
|
646904cd3a | ||
|
|
a0bda3b786 | ||
|
|
228ae9f0f2 | ||
|
|
f87cfadb5c | ||
|
|
a1d6351c3f | ||
|
|
a86eeaadf2 | ||
|
|
5dde0d0c9f | ||
|
|
5767fb4ab1 | ||
|
|
73fd850d17 | ||
|
|
8636a9bac3 | ||
|
|
7af6d81f35 | ||
|
|
a56217f9f6 | ||
|
|
afc44022d0 | ||
|
|
1d2f0edaf9 | ||
|
|
fa35eb27ea | ||
|
|
ffb7b7f446 | ||
|
|
61cf34f544 | ||
|
|
e8a6b1ca92 | ||
|
|
10dea209d2 | ||
|
|
52f3c56e83 | ||
|
|
79f9232ffb | ||
|
|
ee98be4ad7 | ||
|
|
c0c9f30695 | ||
|
|
cacd1630a1 | ||
|
|
6224a38988 | ||
|
|
d6ee677253 | ||
|
|
0046fbcbfc | ||
|
|
ee3a106f34 | ||
|
|
6d05cee4df | ||
|
|
1c2ad94353 | ||
|
|
808b1fed76 | ||
|
|
73922e66e4 | ||
|
|
d9e3011fd1 | ||
|
|
70f1098312 |
6
.github/actionlint.yml
vendored
6
.github/actionlint.yml
vendored
@@ -1,9 +1,3 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
# Workaround for the outdated runner list in actionlint v1.7.7
|
||||
# Ref: https://github.com/rhysd/actionlint/issues/533
|
||||
- windows-11-arm
|
||||
|
||||
config-variables:
|
||||
- KEEP_CACHE_WARM
|
||||
- PUSH_VERSION_COMMIT
|
||||
|
||||
40
.github/workflows/build.yml
vendored
40
.github/workflows/build.yml
vendored
@@ -153,10 +153,12 @@ jobs:
|
||||
'os': 'musllinux',
|
||||
'arch': 'x86_64',
|
||||
'runner': 'ubuntu-24.04',
|
||||
'python_version': '3.14',
|
||||
}, {
|
||||
'os': 'musllinux',
|
||||
'arch': 'aarch64',
|
||||
'runner': 'ubuntu-24.04-arm',
|
||||
'python_version': '3.14',
|
||||
}],
|
||||
}
|
||||
INPUTS = json.loads(os.environ['INPUTS'])
|
||||
@@ -194,7 +196,7 @@ jobs:
|
||||
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # Needed for changelog
|
||||
|
||||
@@ -214,7 +216,7 @@ jobs:
|
||||
|
||||
- name: Build Unix platform-independent binary
|
||||
run: |
|
||||
make all tar
|
||||
make all-extra tar
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
@@ -255,7 +257,7 @@ jobs:
|
||||
SKIP_ONEFILE_BUILD: ${{ (!matrix.onefile && '1') || '' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Cache requirements
|
||||
if: matrix.cache_requirements
|
||||
@@ -318,7 +320,7 @@ jobs:
|
||||
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
# NB: Building universal2 does not work with python from actions/setup-python
|
||||
|
||||
- name: Cache requirements
|
||||
@@ -341,14 +343,14 @@ jobs:
|
||||
brew uninstall --ignore-dependencies python3
|
||||
python3 -m venv ~/yt-dlp-build-venv
|
||||
source ~/yt-dlp-build-venv/bin/activate
|
||||
python3 devscripts/install_deps.py -o --include build
|
||||
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
|
||||
python3 devscripts/install_deps.py --omit-default --include-extra build
|
||||
python3 devscripts/install_deps.py --print --include-extra pyinstaller > requirements.txt
|
||||
# We need to ignore wheels otherwise we break universal2 builds
|
||||
python3 -m pip install -U --no-binary :all: -r requirements.txt
|
||||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
python3 -m pip install -U 'delocate==0.11.0'
|
||||
mkdir curl_cffi_whls curl_cffi_universal2
|
||||
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
|
||||
python3 devscripts/install_deps.py --print --omit-default --include-extra curl-cffi > requirements.txt
|
||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||
python3 -m pip download \
|
||||
--only-binary=:all: \
|
||||
@@ -420,23 +422,23 @@ jobs:
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win_amd64
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: b6496c7630c3afe66900cfa824e8234a8c2e2c81704bd7facd79586abc76c0e5
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: e28cc13e4ad0cc74330d832202806d0c1976e9165da6047309348ca663c0ed3d
|
||||
- arch: 'x86'
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win32
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 2d881843580efdc54f3523507fc6d9c5b6051ee49c743a6d9b7003ac5758c226
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: c00f600c17de3bdd589f043f60ab64fc34fcba6dd902ad973af9c8afc74f80d1
|
||||
- arch: 'arm64'
|
||||
runner: windows-11-arm
|
||||
python_version: '3.13' # arm64 only has Python >= 3.11 available
|
||||
platform_tag: win_arm64
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 4250c9085e34a95c898f3ee2f764914fc36ec59f0d97c28e6a75fcf21f7b144f
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: a2033b18b4f7bc6108b5fd76a92c6c1de0a12ec4fe98a23396a9f978cb4b7d7b
|
||||
env:
|
||||
CHANNEL: ${{ inputs.channel }}
|
||||
ORIGIN: ${{ needs.process.outputs.origin }}
|
||||
@@ -448,7 +450,7 @@ jobs:
|
||||
PYI_WHEEL: pyinstaller-${{ matrix.pyi_version }}-py3-none-${{ matrix.platform_tag }}.whl
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
@@ -482,11 +484,11 @@ jobs:
|
||||
mkdir /pyi-wheels
|
||||
python -m pip download -d /pyi-wheels --no-deps --require-hashes "pyinstaller@${Env:PYI_URL}#sha256=${Env:PYI_HASH}"
|
||||
python -m pip install --force-reinstall -U "/pyi-wheels/${Env:PYI_WHEEL}"
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --omit-default --include-extra build
|
||||
if ("${Env:ARCH}" -eq "x86") {
|
||||
python devscripts/install_deps.py
|
||||
} else {
|
||||
python devscripts/install_deps.py --include curl-cffi
|
||||
python devscripts/install_deps.py --include-extra curl-cffi
|
||||
}
|
||||
|
||||
- name: Prepare
|
||||
|
||||
77
.github/workflows/challenge-tests.yml
vendored
Normal file
77
.github/workflows/challenge-tests.yml
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
name: Challenge Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/challenge-tests.yml
|
||||
- test/test_jsc/*.py
|
||||
- yt_dlp/extractor/youtube/jsc/**.js
|
||||
- yt_dlp/extractor/youtube/jsc/**.py
|
||||
- yt_dlp/extractor/youtube/pot/**.py
|
||||
- yt_dlp/utils/_jsruntime.py
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/challenge-tests.yml
|
||||
- test/test_jsc/*.py
|
||||
- yt_dlp/extractor/youtube/jsc/**.js
|
||||
- yt_dlp/extractor/youtube/jsc/**.py
|
||||
- yt_dlp/extractor/youtube/pot/**.py
|
||||
- yt_dlp/utils/_jsruntime.py
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: challenge-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Challenge Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
|
||||
env:
|
||||
QJS_VERSION: '2025-04-26' # Earliest version with rope strings
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Deno
|
||||
uses: denoland/setup-deno@v2
|
||||
with:
|
||||
deno-version: '2.0.0' # minimum supported version
|
||||
- name: Install Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
# minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0
|
||||
bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }}
|
||||
- name: Install Node
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: '20.0' # minimum supported version
|
||||
- name: Install QuickJS (Linux)
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip
|
||||
unzip quickjs.zip qjs
|
||||
sudo install qjs /usr/local/bin/qjs
|
||||
- name: Install QuickJS (Windows)
|
||||
if: matrix.os == 'windows-latest'
|
||||
shell: pwsh
|
||||
run: |
|
||||
Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip
|
||||
unzip quickjs.zip
|
||||
- name: Install test requirements
|
||||
run: |
|
||||
python ./devscripts/install_deps.py --print --omit-default --include-extra test > requirements.txt
|
||||
python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt
|
||||
python -m pip install -U -r requirements.txt
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true
|
||||
python ./devscripts/run_tests.py test/test_jsc -k download
|
||||
40
.github/workflows/codeql.yml
vendored
40
.github/workflows/codeql.yml
vendored
@@ -2,7 +2,7 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'master', 'gh-pages', 'release' ]
|
||||
branches: [ 'master' ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ 'master' ]
|
||||
@@ -11,7 +11,7 @@ on:
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
name: Analyze (${{ matrix.language }})
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
@@ -21,45 +21,19 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Use only 'java' to analyze code written in Java, Kotlin or both
|
||||
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
|
||||
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
||||
language: [ 'actions', 'javascript-typescript', 'python' ]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
||||
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
||||
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
||||
|
||||
# - run: |
|
||||
# echo "Run, Build Application using script"
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
build-mode: none
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@v4
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
|
||||
29
.github/workflows/core.yml
vendored
29
.github/workflows/core.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/youtube/**.py
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
@@ -17,6 +18,7 @@ on:
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/youtube/**.py
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
@@ -53,16 +55,37 @@ jobs:
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.11
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
|
||||
run: python ./devscripts/install_deps.py --include-extra test --include-extra curl-cffi
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
continue-on-error: False
|
||||
env:
|
||||
source: ${{ (github.event_name == 'push' && github.event.before) || 'origin/master' }}
|
||||
target: ${{ (github.event_name == 'push' && github.event.after) || 'HEAD' }}
|
||||
shell: bash
|
||||
run: |
|
||||
flags=()
|
||||
# Check if a networking file is involved
|
||||
patterns="\
|
||||
^yt_dlp/networking/
|
||||
^yt_dlp/utils/networking\.py$
|
||||
^test/test_http_proxy\.py$
|
||||
^test/test_networking\.py$
|
||||
^test/test_networking_utils\.py$
|
||||
^test/test_socks\.py$
|
||||
^test/test_websockets\.py$
|
||||
^pyproject\.toml$
|
||||
"
|
||||
if git diff --name-only "${source}" "${target}" | grep -Ef <(printf '%s' "${patterns}"); then
|
||||
flags+=(--flaky)
|
||||
fi
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
|
||||
python3 -m devscripts.run_tests "${flags[@]}" --pytest-args '--reruns 2 --reruns-delay 3.0' core
|
||||
|
||||
12
.github/workflows/download.yml
vendored
12
.github/workflows/download.yml
vendored
@@ -9,16 +9,16 @@ jobs:
|
||||
if: "contains(github.event.head_commit.message, 'ci run dl')"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
run: python ./devscripts/install_deps.py --include-extra dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
run: python ./devscripts/run_tests.py download
|
||||
|
||||
full:
|
||||
name: Full Download Tests
|
||||
@@ -36,13 +36,13 @@ jobs:
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.11
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
run: python ./devscripts/install_deps.py --include-extra dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
run: python ./devscripts/run_tests.py download
|
||||
|
||||
10
.github/workflows/quick-test.yml
vendored
10
.github/workflows/quick-test.yml
vendored
@@ -9,13 +9,13 @@ jobs:
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py -o --include test
|
||||
run: python ./devscripts/install_deps.py --omit-default --include-extra test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
@@ -26,14 +26,14 @@ jobs:
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install dev dependencies
|
||||
run: python3 ./devscripts/install_deps.py -o --include static-analysis
|
||||
run: python ./devscripts/install_deps.py --omit-default --include-extra static-analysis
|
||||
- name: Make lazy extractors
|
||||
run: python3 ./devscripts/make_lazy_extractors.py
|
||||
run: python ./devscripts/make_lazy_extractors.py
|
||||
- name: Run ruff
|
||||
run: ruff check --output-format github .
|
||||
- name: Run autopep8
|
||||
|
||||
2
.github/workflows/release-nightly.yml
vendored
2
.github/workflows/release-nightly.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
outputs:
|
||||
commit: ${{ steps.check_for_new_commits.outputs.commit }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Check for new commits
|
||||
|
||||
13
.github/workflows/release.yml
vendored
13
.github/workflows/release.yml
vendored
@@ -75,7 +75,7 @@ jobs:
|
||||
head_sha: ${{ steps.get_target.outputs.head_sha }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/setup-python@v6
|
||||
@@ -180,7 +180,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install pandoc man
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --omit-default --include-extra build
|
||||
|
||||
- name: Prepare
|
||||
env:
|
||||
@@ -233,7 +233,7 @@ jobs:
|
||||
VERSION: ${{ needs.prepare.outputs.version }}
|
||||
HEAD_SHA: ${{ needs.prepare.outputs.head_sha }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/download-artifact@v5
|
||||
@@ -269,9 +269,10 @@ jobs:
|
||||
"[]" \
|
||||
"(https://github.com/${MASTER_REPO}/releases/latest \"Master builds\")" >> ./RELEASE_NOTES
|
||||
fi
|
||||
printf '\n\n%s\n\n%s%s\n\n---\n' \
|
||||
printf '\n\n%s\n\n%s%s%s\n\n---\n' \
|
||||
"#### A description of the various files is in the [README](https://github.com/${REPOSITORY}#release-files)" \
|
||||
"The PyInstaller-bundled executables are subject to the licenses described in " \
|
||||
"The zipimport Unix executable contains code licensed under ISC and MIT. " \
|
||||
"The PyInstaller-bundled executables are subject to these and other licenses, all of which are compiled in " \
|
||||
"[THIRD_PARTY_LICENSES.txt](https://github.com/${BASE_REPO}/blob/${HEAD_SHA}/THIRD_PARTY_LICENSES.txt)" >> ./RELEASE_NOTES
|
||||
python ./devscripts/make_changelog.py -vv --collapsible >> ./RELEASE_NOTES
|
||||
printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES
|
||||
|
||||
41
.github/workflows/signature-tests.yml
vendored
41
.github/workflows/signature-tests.yml
vendored
@@ -1,41 +0,0 @@
|
||||
name: Signature Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: signature-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Signature Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --only-optional --include test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py test/test_youtube_signature.py
|
||||
8
.github/workflows/test-workflows.yml
vendored
8
.github/workflows/test-workflows.yml
vendored
@@ -17,8 +17,8 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
ACTIONLINT_VERSION: "1.7.7"
|
||||
ACTIONLINT_SHA256SUM: 023070a287cd8cccd71515fedc843f1985bf96c436b7effaecce67290e7e0757
|
||||
ACTIONLINT_VERSION: "1.7.9"
|
||||
ACTIONLINT_SHA256SUM: 233b280d05e100837f4af1433c7b40a5dcb306e3aa68fb4f17f8a7f45a7df7b4
|
||||
ACTIONLINT_REPO: https://github.com/rhysd/actionlint
|
||||
|
||||
jobs:
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
name: Check workflows
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10" # Keep this in sync with release.yml's prepare job
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
env:
|
||||
ACTIONLINT_TARBALL: ${{ format('actionlint_{0}_linux_amd64.tar.gz', env.ACTIONLINT_VERSION) }}
|
||||
run: |
|
||||
python -m devscripts.install_deps -o --include test
|
||||
python -m devscripts.install_deps --omit-default --include-extra test
|
||||
sudo apt -y install shellcheck
|
||||
python -m pip install -U pyflakes
|
||||
curl -LO "${ACTIONLINT_REPO}/releases/download/v${ACTIONLINT_VERSION}/${ACTIONLINT_TARBALL}"
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -107,6 +107,7 @@ README.txt
|
||||
test/testdata/sigs/player-*.js
|
||||
test/testdata/thumbnails/empty.webp
|
||||
test/testdata/thumbnails/foo\ %d\ bar/foo_%d.*
|
||||
.ejs-*
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
@@ -129,3 +130,6 @@ yt-dlp.zip
|
||||
# Plugins
|
||||
ytdlp_plugins/
|
||||
yt-dlp-plugins
|
||||
|
||||
# Packages
|
||||
yt_dlp_ejs/
|
||||
|
||||
@@ -177,7 +177,7 @@ # DEVELOPER INSTRUCTIONS
|
||||
|
||||
```shell
|
||||
# To only install development dependencies:
|
||||
$ python -m devscripts.install_deps --include dev
|
||||
$ python -m devscripts.install_deps --include-extra dev
|
||||
|
||||
# Or, for an editable install plus dev dependencies:
|
||||
$ python -m pip install -e ".[default,dev]"
|
||||
@@ -763,7 +763,7 @@ ### Use convenience conversion and parsing functions
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
|
||||
Use `traverse_obj` and `try_call` (supersedes `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
|
||||
25
CONTRIBUTORS
25
CONTRIBUTORS
@@ -818,3 +818,28 @@ robin-mu
|
||||
shssoichiro
|
||||
thanhtaivtt
|
||||
uoag
|
||||
CaramelConnoisseur
|
||||
ctengel
|
||||
einstein95
|
||||
evilpie
|
||||
i3p9
|
||||
JrM2628
|
||||
krystophny
|
||||
matyb08
|
||||
pha1n0q
|
||||
PierceLBrooks
|
||||
TheQWERTYCodr
|
||||
thomasmllt
|
||||
w4grfw
|
||||
WeidiDeng
|
||||
Zer0spectrum
|
||||
0xvd
|
||||
1bnBattuta
|
||||
beliote
|
||||
darkstar
|
||||
Haytam001
|
||||
mrFlamel
|
||||
oxyzenQ
|
||||
putridambassador121
|
||||
RezSat
|
||||
WhatAmISupposedToPutHere
|
||||
|
||||
123
Changelog.md
123
Changelog.md
@@ -4,6 +4,129 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.12.08
|
||||
|
||||
#### Core changes
|
||||
- [Respect `PATHEXT` when locating JS runtime on Windows](https://github.com/yt-dlp/yt-dlp/commit/e564b4a8080cff48fa0c28f20272c05085ee6130) ([#15117](https://github.com/yt-dlp/yt-dlp/issues/15117)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **cookies**: [Fix `--cookies-from-browser` for new installs of Firefox 147+](https://github.com/yt-dlp/yt-dlp/commit/fa16dc5241ac1552074feee48e1c2605dc36d352) ([#15215](https://github.com/yt-dlp/yt-dlp/issues/15215)) by [bashonly](https://github.com/bashonly), [mbway](https://github.com/mbway)
|
||||
|
||||
#### Extractor changes
|
||||
- **agalega**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3cb5e4db54d44fe82d4eee94ae2f37cbce2e7dfc) ([#15105](https://github.com/yt-dlp/yt-dlp/issues/15105)) by [putridambassador121](https://github.com/putridambassador121)
|
||||
- **alibaba**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c70b57c03e0c25767a5166620798297a2a4878fb) ([#15253](https://github.com/yt-dlp/yt-dlp/issues/15253)) by [seproDev](https://github.com/seproDev)
|
||||
- **bitmovin**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/45a3b42bb917e99b0b5c155c272ebf4a82a5bf66) ([#15064](https://github.com/yt-dlp/yt-dlp/issues/15064)) by [seproDev](https://github.com/seproDev)
|
||||
- **digiteka**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/6842620d56e4c4e6affb90c2f8dff8a36dee852c) ([#14903](https://github.com/yt-dlp/yt-dlp/issues/14903)) by [beliote](https://github.com/beliote)
|
||||
- **fc2**: live: [Raise appropriate error when stream is offline](https://github.com/yt-dlp/yt-dlp/commit/4433b3a217c9f430dc057643bfd7b6769eff4a45) ([#15180](https://github.com/yt-dlp/yt-dlp/issues/15180)) by [Zer0spectrum](https://github.com/Zer0spectrum)
|
||||
- **floatplane**: [Add subtitle support](https://github.com/yt-dlp/yt-dlp/commit/b333ef1b3f961e292a8bf7052c54b54c81587a17) ([#15069](https://github.com/yt-dlp/yt-dlp/issues/15069)) by [seproDev](https://github.com/seproDev)
|
||||
- **jtbc**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/947e7883406e5ea43687d6e4ff721cc0162c9664) ([#15047](https://github.com/yt-dlp/yt-dlp/issues/15047)) by [seproDev](https://github.com/seproDev)
|
||||
- **loom**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/36b29bb3532e008a2aaf3d36d1c6fc3944137930) ([#15236](https://github.com/yt-dlp/yt-dlp/issues/15236)) by [bashonly](https://github.com/bashonly)
|
||||
- **mave**: channel: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5f66ac71f6637f768cd251509b0a932d0ce56427) ([#14915](https://github.com/yt-dlp/yt-dlp/issues/14915)) by [anlar](https://github.com/anlar)
|
||||
- **medaltv**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/a4c72acc462668a938827370bd77084a1cd4733b) ([#15103](https://github.com/yt-dlp/yt-dlp/issues/15103)) by [seproDev](https://github.com/seproDev)
|
||||
- **netapp**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/20f83f208eae863250b35e2761adad88e91d85a1) ([#15122](https://github.com/yt-dlp/yt-dlp/issues/15122)) by [darkstar](https://github.com/darkstar)
|
||||
- **nhk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/12d411722a3d7a0382d1d230a904ecd4e20298b6) ([#14528](https://github.com/yt-dlp/yt-dlp/issues/14528)) by [garret1317](https://github.com/garret1317)
|
||||
- **nowcanal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/4e680db1505dafb93313b1d42ffcd3f230fcc92a) ([#14584](https://github.com/yt-dlp/yt-dlp/issues/14584)) by [pferreir](https://github.com/pferreir)
|
||||
- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/023e4db9afe0630c608621846856a1ca876d8bab) ([#15108](https://github.com/yt-dlp/yt-dlp/issues/15108)) by [thomasmllt](https://github.com/thomasmllt)
|
||||
- **rinsefm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/d6aa8c235d2e7d9374f79ec73af23a3859c76bea) ([#15020](https://github.com/yt-dlp/yt-dlp/issues/15020)) by [1bnBattuta](https://github.com/1bnBattuta), [seproDev](https://github.com/seproDev)
|
||||
- **s4c**: [Fix geo-restricted content](https://github.com/yt-dlp/yt-dlp/commit/26c2545b87e2b22f134d1f567ed4d4b0b91c3253) ([#15196](https://github.com/yt-dlp/yt-dlp/issues/15196)) by [seproDev](https://github.com/seproDev)
|
||||
- **soundcloudplaylist**: [Support new API URLs](https://github.com/yt-dlp/yt-dlp/commit/1dd84b9d1c33e50de49866b0d93c2596897ce506) ([#15071](https://github.com/yt-dlp/yt-dlp/issues/15071)) by [seproDev](https://github.com/seproDev)
|
||||
- **sporteurope**: [Support new domain](https://github.com/yt-dlp/yt-dlp/commit/025191fea655ac879ca6dc68df358c26456a6e46) ([#15251](https://github.com/yt-dlp/yt-dlp/issues/15251)) by [bashonly](https://github.com/bashonly)
|
||||
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2c9f0c3456057aff0631d9ea6d3eda70ffd8aabe) ([#15113](https://github.com/yt-dlp/yt-dlp/issues/15113)) by [bashonly](https://github.com/bashonly)
|
||||
- **thechosen**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/854fded114f3b7b33693c2d3418575d04014aa4b) ([#14183](https://github.com/yt-dlp/yt-dlp/issues/14183)) by [mrFlamel](https://github.com/mrFlamel)
|
||||
- **thisoldhouse**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/9daba4f442139ee2537746398afc5ac30b51c28c) ([#15097](https://github.com/yt-dlp/yt-dlp/issues/15097)) by [bashonly](https://github.com/bashonly)
|
||||
- **tubitv**: series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2a777ecbd598de19a4c691ba1f790ccbec9cdbc4) ([#15018](https://github.com/yt-dlp/yt-dlp/issues/15018)) by [Zer0spectrum](https://github.com/Zer0spectrum)
|
||||
- **urplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c2e7e9cdb2261adde01048d161914b156a3bad51) ([#15120](https://github.com/yt-dlp/yt-dlp/issues/15120)) by [seproDev](https://github.com/seproDev)
|
||||
- **web.archive**: youtube: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7ec6b9bc40ee8a21b11cce83a09a07a37014062e) ([#15234](https://github.com/yt-dlp/yt-dlp/issues/15234)) by [seproDev](https://github.com/seproDev)
|
||||
- **wistiachannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0c696239ef418776ac6ba20284bd2f3976a011b4) ([#14218](https://github.com/yt-dlp/yt-dlp/issues/14218)) by [Sojiroh](https://github.com/Sojiroh)
|
||||
- **xhamster**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/29e257037862f3b2ad65e6e8d2972f9ed89389e3) ([#15252](https://github.com/yt-dlp/yt-dlp/issues/15252)) by [0xvd](https://github.com/0xvd)
|
||||
- **yfanefa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/af285016d2b14c4445109283e7c590b31542de88) ([#15032](https://github.com/yt-dlp/yt-dlp/issues/15032)) by [Haytam001](https://github.com/Haytam001)
|
||||
- **youtube**
|
||||
- [Add `use_ad_playback_context` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/f7acf3c1f42cc474927ecc452205d7877af36731) ([#15220](https://github.com/yt-dlp/yt-dlp/issues/15220)) by [WhatAmISupposedToPutHere](https://github.com/WhatAmISupposedToPutHere)
|
||||
- [Allow `ejs` patch version to differ](https://github.com/yt-dlp/yt-dlp/commit/7bd79d92965fe9f84d7e1720eb6bb10fa9a10c77) ([#15263](https://github.com/yt-dlp/yt-dlp/issues/15263)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Detect "super resolution" AI-upscaled formats](https://github.com/yt-dlp/yt-dlp/commit/4cb5e191efeebc3679f89c3c8ac819bcd511bb1f) ([#15050](https://github.com/yt-dlp/yt-dlp/issues/15050)) by [bashonly](https://github.com/bashonly)
|
||||
- [Determine wait time from player response](https://github.com/yt-dlp/yt-dlp/commit/715af0c636b2b33fb3df1eb2ee37eac8262d43ac) ([#14646](https://github.com/yt-dlp/yt-dlp/issues/14646)) by [bashonly](https://github.com/bashonly), [WhatAmISupposedToPutHere](https://github.com/WhatAmISupposedToPutHere)
|
||||
- [Extract all automatic caption languages](https://github.com/yt-dlp/yt-dlp/commit/419776ecf57269efb13095386a19ddc75c1f11b2) ([#15156](https://github.com/yt-dlp/yt-dlp/issues/15156)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve message when no JS runtime is found](https://github.com/yt-dlp/yt-dlp/commit/1d43fa5af883f96af902a29544fc766f5e97fce6) ([#15266](https://github.com/yt-dlp/yt-dlp/issues/15266)) by [bashonly](https://github.com/bashonly)
|
||||
- [Update ejs to 0.3.2](https://github.com/yt-dlp/yt-dlp/commit/0c7e4cfcaed95909d7c1c0a11b5a12881bcfdfd6) ([#15267](https://github.com/yt-dlp/yt-dlp/issues/15267)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Downloader changes
|
||||
- [Fix playback wait time for ffmpeg downloads](https://github.com/yt-dlp/yt-dlp/commit/23f1ab346927ab73ad510fd7ba105a69e5291c66) ([#15066](https://github.com/yt-dlp/yt-dlp/issues/15066)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Postprocessor changes
|
||||
- **ffmpeg**: [Fix uncaught error if bad --ffmpeg-location is given](https://github.com/yt-dlp/yt-dlp/commit/0eed3fe530d6ff4b668494c5b1d4d6fc1ade96f7) ([#15104](https://github.com/yt-dlp/yt-dlp/issues/15104)) by [bashonly](https://github.com/bashonly)
|
||||
- **ffmpegmetadata**: [Add more tag mappings](https://github.com/yt-dlp/yt-dlp/commit/04050be583aae21f99932a674d1d2992ff016d5c) ([#14654](https://github.com/yt-dlp/yt-dlp/issues/14654)) by [garret1317](https://github.com/garret1317)
|
||||
|
||||
#### Networking changes
|
||||
- **Request Handler**: urllib: [Do not read after close](https://github.com/yt-dlp/yt-dlp/commit/6ee6a6fc58d6254ef944bd311e6890e208a75e98) ([#15049](https://github.com/yt-dlp/yt-dlp/issues/15049)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**: [Bump PyInstaller minimum version requirement to 6.17.0](https://github.com/yt-dlp/yt-dlp/commit/280165026886a1f1614ab527c34c66d71faa5d69) ([#15199](https://github.com/yt-dlp/yt-dlp/issues/15199)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [7a52ff2](https://github.com/yt-dlp/yt-dlp/commit/7a52ff29d86efc8f3adeba977b2009ce40b8e52e) by [bashonly](https://github.com/bashonly), [oxyzenQ](https://github.com/oxyzenQ), [RezSat](https://github.com/RezSat), [seproDev](https://github.com/seproDev)
|
||||
- **devscripts**: `install_deps`: [Align options/terms with PEP 735](https://github.com/yt-dlp/yt-dlp/commit/29fe515d8d3386b3406ff02bdabb967d6821bc02) ([#15200](https://github.com/yt-dlp/yt-dlp/issues/15200)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.11.12
|
||||
|
||||
#### Important changes
|
||||
- **An external JavaScript runtime is now required for full YouTube support**
|
||||
yt-dlp now requires users to have an external JavaScript runtime (e.g. Deno) installed in order to solve the JavaScript challenges presented by YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/15012)
|
||||
|
||||
#### Core changes
|
||||
- **cookies**
|
||||
- [Allow `--cookies-from-browser` for Safari on iOS](https://github.com/yt-dlp/yt-dlp/commit/e6414d64e73d86d65bb357e5ad59d0ca080d5812) ([#14950](https://github.com/yt-dlp/yt-dlp/issues/14950)) by [pha1n0q](https://github.com/pha1n0q)
|
||||
- [Support Firefox cookies database v17](https://github.com/yt-dlp/yt-dlp/commit/bf7e04e9d8bd3c4a4614b67ce617b7ae5d17d62a) ([#15010](https://github.com/yt-dlp/yt-dlp/issues/15010)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **sponsorblock**: [Add `hook` category](https://github.com/yt-dlp/yt-dlp/commit/52f3c56e83bbb25eec2496b0499768753732a093) ([#14845](https://github.com/yt-dlp/yt-dlp/issues/14845)) by [seproDev](https://github.com/seproDev)
|
||||
- **update**: [Fix PyInstaller onedir variant detection](https://github.com/yt-dlp/yt-dlp/commit/1c2ad94353d1c9e03615d20b6bbfc293286c7a32) ([#14800](https://github.com/yt-dlp/yt-dlp/issues/14800)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Extractor changes
|
||||
- **1tv**: live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/19c5d7c53013440ec4f3f56ebbb067531b272f3f) ([#14299](https://github.com/yt-dlp/yt-dlp/issues/14299)) by [swayll](https://github.com/swayll)
|
||||
- **ardaudiothek**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0046fbcbfceee32fa2f68a8ea00cca02765470b6) ([#14309](https://github.com/yt-dlp/yt-dlp/issues/14309)) by [evilpie](https://github.com/evilpie), [marieell](https://github.com/marieell)
|
||||
- **bunnycdn**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/228ae9f0f2b441fa1296db2ed2b7afbd4a9a62a1) ([#14954](https://github.com/yt-dlp/yt-dlp/issues/14954)) by [seproDev](https://github.com/seproDev)
|
||||
- [Support player subdomain URLs](https://github.com/yt-dlp/yt-dlp/commit/3ef867451cd9604b4195dfee00db768619629b2d) ([#14979](https://github.com/yt-dlp/yt-dlp/issues/14979)) by [einstein95](https://github.com/einstein95)
|
||||
- **discoverynetworksde**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/10dea209d2460daf924c93835ddc2f0301cf2cd4) ([#14818](https://github.com/yt-dlp/yt-dlp/issues/14818)) by [dirkf](https://github.com/dirkf), [w4grfw](https://github.com/w4grfw) (With fixes in [f3c255b](https://github.com/yt-dlp/yt-dlp/commit/f3c255b63bd26069151fc3d3ba6dc626bb62ad6e) by [bashonly](https://github.com/bashonly))
|
||||
- **floatplane**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1ac7e6005cd3be9fff0b28be189c3a68ecd4c593) ([#14984](https://github.com/yt-dlp/yt-dlp/issues/14984)) by [i3p9](https://github.com/i3p9)
|
||||
- **googledrive**
|
||||
- [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/6d05cee4df30774ddce5c5c751fd2118f40c24fe) ([#14809](https://github.com/yt-dlp/yt-dlp/issues/14809)) by [seproDev](https://github.com/seproDev)
|
||||
- [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/70f1098312fe53bc85358f7bd624370878b2fa28) ([#14746](https://github.com/yt-dlp/yt-dlp/issues/14746)) by [seproDev](https://github.com/seproDev)
|
||||
- **kika**: [Do not extract non-existent subtitles](https://github.com/yt-dlp/yt-dlp/commit/79f9232ffbd57dde91c372b673b42801edaa9e53) ([#14813](https://github.com/yt-dlp/yt-dlp/issues/14813)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
- **mux**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a0bda3b78609593ce1127215fc035c1a308a89b6) ([#14914](https://github.com/yt-dlp/yt-dlp/issues/14914)) by [PierceLBrooks](https://github.com/PierceLBrooks), [seproDev](https://github.com/seproDev)
|
||||
- **nascarclassics**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e8a6b1ca92f2a0ce2c187668165be23dc5506aab) ([#14866](https://github.com/yt-dlp/yt-dlp/issues/14866)) by [JrM2628](https://github.com/JrM2628)
|
||||
- **nbc**: [Detect and discard DRM formats](https://github.com/yt-dlp/yt-dlp/commit/ee3a106f34124f0e2d28f062f5302863fd7639be) ([#14844](https://github.com/yt-dlp/yt-dlp/issues/14844)) by [bashonly](https://github.com/bashonly)
|
||||
- **ntv.ru**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/5dde0d0c9fcef2ce57e486b2e563e0dff9b2845a) ([#14934](https://github.com/yt-dlp/yt-dlp/issues/14934)) by [anlar](https://github.com/anlar), [seproDev](https://github.com/seproDev) (With fixes in [a86eeaa](https://github.com/yt-dlp/yt-dlp/commit/a86eeaadf236ceaf6bb232eb410cf21572538aa6) by [seproDev](https://github.com/seproDev))
|
||||
- **play.tv**: [Update extractor for new domain](https://github.com/yt-dlp/yt-dlp/commit/73fd850d170e01c47c31aaa6aa8fe90856d9ad18) ([#14905](https://github.com/yt-dlp/yt-dlp/issues/14905)) by [thomasmllt](https://github.com/thomasmllt)
|
||||
- **tubetugraz**: [Support alternate URL format](https://github.com/yt-dlp/yt-dlp/commit/f3597cfafcab4d7d4c6d41bff3647681301f1e6b) ([#14718](https://github.com/yt-dlp/yt-dlp/issues/14718)) by [krystophny](https://github.com/krystophny)
|
||||
- **twitch**
|
||||
- [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/cb78440e468608fd55546280b537387d375335f2) ([#15008](https://github.com/yt-dlp/yt-dlp/issues/15008)) by [bashonly](https://github.com/bashonly), [ctengel](https://github.com/ctengel)
|
||||
- stream: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7eff676183518175ce495ae63291c89f9b39f02a) ([#14988](https://github.com/yt-dlp/yt-dlp/issues/14988)) by [seproDev](https://github.com/seproDev)
|
||||
- vod: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b46c572b26be15683584102c5fb7e7bfde0c9821) ([#14999](https://github.com/yt-dlp/yt-dlp/issues/14999)) by [Zer0spectrum](https://github.com/Zer0spectrum)
|
||||
- **urplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/808b1fed76fbd07840cc23a346c11334e3d34f43) ([#14785](https://github.com/yt-dlp/yt-dlp/issues/14785)) by [seproDev](https://github.com/seproDev)
|
||||
- **web.archive**: youtube: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d9e3011fd1c3a75871a50e78533afe78ad427ce3) ([#14753](https://github.com/yt-dlp/yt-dlp/issues/14753)) by [seproDev](https://github.com/seproDev)
|
||||
- **xhamster**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a1d6351c3fc82c07fa0ee70811ed84807f6bbb58) ([#14948](https://github.com/yt-dlp/yt-dlp/issues/14948)) by [CaramelConnoisseur](https://github.com/CaramelConnoisseur), [dhwz](https://github.com/dhwz)
|
||||
- **youtube**
|
||||
- [Add `tv_downgraded` client](https://github.com/yt-dlp/yt-dlp/commit/61cf34f5447177a73ba25ea9a47d7df516ca3b3b) ([#14887](https://github.com/yt-dlp/yt-dlp/issues/14887)) by [seproDev](https://github.com/seproDev) (With fixes in [fa35eb2](https://github.com/yt-dlp/yt-dlp/commit/fa35eb27eaf27df7b5854f527a89fc828c9e0ec0))
|
||||
- [Fix `web_embedded` client extraction](https://github.com/yt-dlp/yt-dlp/commit/d6ee67725397807bbb5edcd0b2c94f5bca62d3f4) ([#14843](https://github.com/yt-dlp/yt-dlp/issues/14843)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- [Fix auto-generated metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/a56217f9f6c594f6c419ce8dce9134198a9d90d0) ([#13896](https://github.com/yt-dlp/yt-dlp/issues/13896)) by [TheQWERTYCodr](https://github.com/TheQWERTYCodr)
|
||||
- [Fix original language detection](https://github.com/yt-dlp/yt-dlp/commit/afc44022d0b736b2b3e87b52490bd35c53c53632) ([#14919](https://github.com/yt-dlp/yt-dlp/issues/14919)) by [bashonly](https://github.com/bashonly)
|
||||
- [Implement external n/sig solver](https://github.com/yt-dlp/yt-dlp/commit/6224a3898821965a7d6a2cb9cc2de40a0fd6e6bc) ([#14157](https://github.com/yt-dlp/yt-dlp/issues/14157)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) (With fixes in [4b4223b](https://github.com/yt-dlp/yt-dlp/commit/4b4223b436fb03a12628679daed32ae4fc15ae4b), [ee98be4](https://github.com/yt-dlp/yt-dlp/commit/ee98be4ad767b77e4d8dd9bfd3c7d10f2e8397ff), [c0c9f30](https://github.com/yt-dlp/yt-dlp/commit/c0c9f30695db314df084e8701a7c376eb54f283c), [cacd163](https://github.com/yt-dlp/yt-dlp/commit/cacd1630a1a59e92f857d0d175c8730cffbf9801), [8636a9b](https://github.com/yt-dlp/yt-dlp/commit/8636a9bac3bed99984c1e297453660468ecf504b))
|
||||
- [Support collaborators](https://github.com/yt-dlp/yt-dlp/commit/f87cfadb5c3cba8e9dc4231c9554548e9edb3882) ([#14677](https://github.com/yt-dlp/yt-dlp/issues/14677)) by [seproDev](https://github.com/seproDev)
|
||||
- tab: [Fix duration extraction for feeds](https://github.com/yt-dlp/yt-dlp/commit/1d2f0edaf978a5541cfb8f7e83fec433c65c1011) ([#14668](https://github.com/yt-dlp/yt-dlp/issues/14668)) by [WeidiDeng](https://github.com/WeidiDeng)
|
||||
|
||||
#### Downloader changes
|
||||
- **ffmpeg**
|
||||
- [Apply `ffmpeg_args` for each format](https://github.com/yt-dlp/yt-dlp/commit/ffb7b7f446b6c67a28c66598ae91f4f2263e0d75) ([#14886](https://github.com/yt-dlp/yt-dlp/issues/14886)) by [bashonly](https://github.com/bashonly)
|
||||
- [Limit read rate for DASH livestreams](https://github.com/yt-dlp/yt-dlp/commit/7af6d81f35aea8832023daa30ada10e6673a0529) ([#14918](https://github.com/yt-dlp/yt-dlp/issues/14918)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Networking changes
|
||||
- [Ensure underlying file object is closed when fully read](https://github.com/yt-dlp/yt-dlp/commit/5767fb4ab108dddb07fc839a3b0f4d323a7c4bea) ([#14935](https://github.com/yt-dlp/yt-dlp/issues/14935)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- [Fix zsh path argument completion](https://github.com/yt-dlp/yt-dlp/commit/c96e9291ab7bd6e7da66d33424982c8b0b4431c7) ([#14953](https://github.com/yt-dlp/yt-dlp/issues/14953)) by [matyb08](https://github.com/matyb08)
|
||||
- **build**: [Bump musllinux Python version to 3.14](https://github.com/yt-dlp/yt-dlp/commit/646904cd3a79429ec5fdc43f904b3f57ae213f34) ([#14623](https://github.com/yt-dlp/yt-dlp/issues/14623)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**
|
||||
- Miscellaneous
|
||||
- [c63b4e2](https://github.com/yt-dlp/yt-dlp/commit/c63b4e2a2b81cc78397c8709ef53ffd29bada213) by [bashonly](https://github.com/bashonly), [matyb08](https://github.com/matyb08), [seproDev](https://github.com/seproDev)
|
||||
- [335653b](https://github.com/yt-dlp/yt-dlp/commit/335653be82d5ef999cfc2879d005397402eebec1) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **devscripts**: [Improve `install_deps` script](https://github.com/yt-dlp/yt-dlp/commit/73922e66e437fb4bb618bdc119a96375081bf508) ([#14766](https://github.com/yt-dlp/yt-dlp/issues/14766)) by [bashonly](https://github.com/bashonly)
|
||||
- **test**: [Skip flaky tests if source unchanged](https://github.com/yt-dlp/yt-dlp/commit/ade8c2b36ff300edef87d48fd1ba835ac35c5b63) ([#14970](https://github.com/yt-dlp/yt-dlp/issues/14970)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
### 2025.10.22
|
||||
|
||||
#### Important changes
|
||||
|
||||
@@ -8,7 +8,7 @@ ## Core Maintainers
|
||||
|
||||
Core Maintainers are responsible for reviewing and merging contributions, publishing releases, and steering the overall direction of the project.
|
||||
|
||||
**You can contact the core maintainers via `maintainers@yt-dlp.org`.**
|
||||
**You can contact the core maintainers via `maintainers@yt-dlp.org`.** This email address is **NOT** a support channel. [Open an issue](https://github.com/yt-dlp/yt-dlp/issues/new/choose) if you need help or want to report a bug.
|
||||
|
||||
### [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
@@ -16,6 +16,7 @@ ### [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
* Overhauled the networking stack and implemented support for `requests` and `curl_cffi` (`--impersonate`) HTTP clients
|
||||
* Reworked the plugin architecture to support installing plugins across all yt-dlp distributions (exe, pip, etc.)
|
||||
* Implemented support for external JavaScript runtimes/engines
|
||||
* Maintains support for YouTube
|
||||
* Added and fixed support for various other sites
|
||||
|
||||
@@ -23,9 +24,10 @@ ### [bashonly](https://github.com/bashonly)
|
||||
|
||||
* Rewrote and maintains the build/release workflows and the self-updater: executables, automated/nightly/master releases, `--update-to`
|
||||
* Overhauled external downloader cookie handling
|
||||
* Helped in implementing support for external JavaScript runtimes/engines
|
||||
* Added `--cookies-from-browser` support for Firefox containers
|
||||
* Overhauled and maintains support for sites like Youtube, Vimeo, Twitter, TikTok, etc
|
||||
* Added support for sites like Dacast, Kick, Loom, SproutVideo, Triller, Weverse, etc
|
||||
* Maintains support for sites like YouTube, Vimeo, Twitter, TikTok, etc
|
||||
* Added support for various sites
|
||||
|
||||
|
||||
### [Grub4K](https://github.com/Grub4K)
|
||||
@@ -35,12 +37,14 @@ ### [Grub4K](https://github.com/Grub4K)
|
||||
* `--update-to`, self-updater rewrite, automated/nightly/master releases
|
||||
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
|
||||
* Implemented proper progress reporting for parallel downloads
|
||||
* Implemented support for external JavaScript runtimes/engines
|
||||
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
|
||||
|
||||
|
||||
### [sepro](https://github.com/seproDev)
|
||||
|
||||
* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
|
||||
* Helped in implementing support for external JavaScript runtimes/engines
|
||||
* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
|
||||
* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc
|
||||
|
||||
|
||||
103
Makefile
103
Makefile
@@ -1,4 +1,5 @@
|
||||
all: lazy-extractors yt-dlp doc pypi-files
|
||||
all-extra: lazy-extractors yt-dlp-extra doc pypi-files
|
||||
clean: clean-test clean-dist
|
||||
clean-all: clean clean-cache
|
||||
completions: completion-bash completion-fish completion-zsh
|
||||
@@ -15,7 +16,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
.PHONY: all clean clean-all clean-test clean-dist clean-cache \
|
||||
completions completion-bash completion-fish completion-zsh \
|
||||
doc issuetemplates supportedsites ot offlinetest codetest test \
|
||||
tar pypi-files lazy-extractors install uninstall
|
||||
tar pypi-files lazy-extractors install uninstall \
|
||||
all-extra yt-dlp-extra current-ejs-version
|
||||
|
||||
.IGNORE: current-ejs-version
|
||||
.SILENT: current-ejs-version
|
||||
|
||||
clean-test:
|
||||
rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
@@ -25,7 +30,8 @@ clean-test:
|
||||
test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."*
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS \
|
||||
yt-dlp.zip .ejs-* yt_dlp_ejs/
|
||||
clean-cache:
|
||||
find . \( \
|
||||
-type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
|
||||
@@ -81,28 +87,49 @@ test:
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m pytest -Werror -m "not download"
|
||||
|
||||
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
|
||||
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
|
||||
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
|
||||
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
|
||||
CODE_FILES != $(CODE_FILES_CMD)
|
||||
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
|
||||
yt-dlp: $(CODE_FILES)
|
||||
PY_CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's|/__init__\.py||' | grep -v '/__' | sort
|
||||
PY_CODE_FOLDERS != $(PY_CODE_FOLDERS_CMD)
|
||||
PY_CODE_FOLDERS ?= $(shell $(PY_CODE_FOLDERS_CMD))
|
||||
|
||||
PY_CODE_FILES_CMD = for f in $(PY_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.py|' ; done
|
||||
PY_CODE_FILES != $(PY_CODE_FILES_CMD)
|
||||
PY_CODE_FILES ?= $(shell $(PY_CODE_FILES_CMD))
|
||||
|
||||
JS_CODE_FOLDERS_CMD = find yt_dlp -type f -name '*.js' | sed 's|/[^/]\{1,\}\.js$$||' | uniq
|
||||
JS_CODE_FOLDERS != $(JS_CODE_FOLDERS_CMD)
|
||||
JS_CODE_FOLDERS ?= $(shell $(JS_CODE_FOLDERS_CMD))
|
||||
|
||||
JS_CODE_FILES_CMD = for f in $(JS_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.js|' ; done
|
||||
JS_CODE_FILES != $(JS_CODE_FILES_CMD)
|
||||
JS_CODE_FILES ?= $(shell $(JS_CODE_FILES_CMD))
|
||||
|
||||
yt-dlp.zip: $(PY_CODE_FILES) $(JS_CODE_FILES)
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
for d in $(PY_CODE_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
(cd zip && touch -t 200001010101 $(CODE_FILES))
|
||||
mv zip/yt_dlp/__main__.py zip/
|
||||
(cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
|
||||
for d in $(JS_CODE_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.js zip/$$d/ ;\
|
||||
done
|
||||
(cd zip && touch -t 200001010101 $(PY_CODE_FILES) $(JS_CODE_FILES))
|
||||
rm -f zip/yt_dlp/__main__.py
|
||||
(cd zip && zip -q ../yt-dlp.zip $(PY_CODE_FILES) $(JS_CODE_FILES))
|
||||
rm -rf zip
|
||||
|
||||
yt-dlp: yt-dlp.zip
|
||||
mkdir -p zip
|
||||
cp -pP yt_dlp/__main__.py zip/
|
||||
touch -t 200001010101 zip/__main__.py
|
||||
(cd zip && zip -q ../yt-dlp.zip __main__.py)
|
||||
echo '#!$(PYTHON)' > yt-dlp
|
||||
cat yt-dlp.zip >> yt-dlp
|
||||
rm yt-dlp.zip
|
||||
chmod a+x yt-dlp
|
||||
rm -rf zip
|
||||
|
||||
README.md: $(CODE_FILES) devscripts/make_readme.py
|
||||
README.md: $(PY_CODE_FILES) devscripts/make_readme.py
|
||||
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md devscripts/make_contributing.py
|
||||
@@ -127,15 +154,15 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
|
||||
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
|
||||
rm -f yt-dlp.1.temp.md
|
||||
|
||||
completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
|
||||
completions/bash/yt-dlp: $(PY_CODE_FILES) devscripts/bash-completion.in
|
||||
mkdir -p completions/bash
|
||||
$(PYTHON) devscripts/bash-completion.py
|
||||
|
||||
completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
|
||||
completions/zsh/_yt-dlp: $(PY_CODE_FILES) devscripts/zsh-completion.in
|
||||
mkdir -p completions/zsh
|
||||
$(PYTHON) devscripts/zsh-completion.py
|
||||
|
||||
completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
|
||||
completions/fish/yt-dlp.fish: $(PY_CODE_FILES) devscripts/fish-completion.in
|
||||
mkdir -p completions/fish
|
||||
$(PYTHON) devscripts/fish-completion.py
|
||||
|
||||
@@ -172,3 +199,45 @@ CONTRIBUTORS: Changelog.md
|
||||
echo 'Updating $@ from git commit history' ; \
|
||||
$(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \
|
||||
fi
|
||||
|
||||
# The following EJS_-prefixed variables are auto-generated by devscripts/update_ejs.py
|
||||
# DO NOT EDIT!
|
||||
EJS_VERSION = 0.3.2
|
||||
EJS_WHEEL_NAME = yt_dlp_ejs-0.3.2-py3-none-any.whl
|
||||
EJS_WHEEL_HASH = sha256:f2dc6b3d1b909af1f13e021621b0af048056fca5fb07c4db6aa9bbb37a4f66a9
|
||||
EJS_PY_FOLDERS = yt_dlp_ejs yt_dlp_ejs/yt yt_dlp_ejs/yt/solver
|
||||
EJS_PY_FILES = yt_dlp_ejs/__init__.py yt_dlp_ejs/_version.py yt_dlp_ejs/yt/__init__.py yt_dlp_ejs/yt/solver/__init__.py
|
||||
EJS_JS_FOLDERS = yt_dlp_ejs/yt/solver
|
||||
EJS_JS_FILES = yt_dlp_ejs/yt/solver/core.min.js yt_dlp_ejs/yt/solver/lib.min.js
|
||||
|
||||
yt-dlp-extra: current-ejs-version .ejs-$(EJS_VERSION) $(EJS_PY_FILES) $(EJS_JS_FILES) yt-dlp.zip
|
||||
mkdir -p zip
|
||||
for d in $(EJS_PY_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
for d in $(EJS_JS_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.js zip/$$d/ ;\
|
||||
done
|
||||
(cd zip && touch -t 200001010101 $(EJS_PY_FILES) $(EJS_JS_FILES))
|
||||
(cd zip && zip -q ../yt-dlp.zip $(EJS_PY_FILES) $(EJS_JS_FILES))
|
||||
cp -pP yt_dlp/__main__.py zip/
|
||||
touch -t 200001010101 zip/__main__.py
|
||||
(cd zip && zip -q ../yt-dlp.zip __main__.py)
|
||||
echo '#!$(PYTHON)' > yt-dlp
|
||||
cat yt-dlp.zip >> yt-dlp
|
||||
rm yt-dlp.zip
|
||||
chmod a+x yt-dlp
|
||||
rm -rf zip
|
||||
|
||||
.ejs-$(EJS_VERSION):
|
||||
@echo Downloading yt-dlp-ejs
|
||||
@echo "yt-dlp-ejs==$(EJS_VERSION) --hash $(EJS_WHEEL_HASH)" > .ejs-requirements.txt
|
||||
$(PYTHON) -m pip download -d ./build --no-deps --require-hashes -r .ejs-requirements.txt
|
||||
unzip -o build/$(EJS_WHEEL_NAME) "yt_dlp_ejs/*"
|
||||
@touch .ejs-$(EJS_VERSION)
|
||||
|
||||
current-ejs-version:
|
||||
rm -rf .ejs-*
|
||||
touch .ejs-$$($(PYTHON) -c 'import sys; sys.path = [""]; from yt_dlp_ejs import version; print(version)' 2>/dev/null)
|
||||
|
||||
92
README.md
92
README.md
@@ -145,9 +145,11 @@ #### Licensing
|
||||
|
||||
Most notably, the PyInstaller-bundled executables include GPLv3+ licensed code, and as such the combined work is licensed under [GPLv3+](https://www.gnu.org/licenses/gpl-3.0.html).
|
||||
|
||||
See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for details.
|
||||
The zipimport Unix executable (`yt-dlp`) contains [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) licensed code from [`meriyah`](https://github.com/meriyah/meriyah) and [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) licensed code from [`astring`](https://github.com/davidbonnet/astring).
|
||||
|
||||
The zipimport binary (`yt-dlp`), the source tarball (`yt-dlp.tar.gz`), and the PyPI source distribution & wheel only contain code licensed under the [Unlicense](LICENSE).
|
||||
See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for more details.
|
||||
|
||||
The git repository, the source tarball (`yt-dlp.tar.gz`), the PyPI source distribution and the PyPI built distribution (wheel) only contain code licensed under the [Unlicense](LICENSE).
|
||||
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
@@ -187,7 +189,7 @@ # To update to nightly from stable executable/binary:
|
||||
yt-dlp --update-to nightly
|
||||
|
||||
# To install nightly with pip:
|
||||
python3 -m pip install -U --pre "yt-dlp[default]"
|
||||
python -m pip install -U --pre "yt-dlp[default]"
|
||||
```
|
||||
|
||||
When running a yt-dlp version that is older than 90 days, you will see a warning message suggesting to update to the latest version.
|
||||
@@ -201,7 +203,7 @@ ## DEPENDENCIES
|
||||
On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
|
||||
-->
|
||||
|
||||
While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
|
||||
While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a supported JavaScript runtime/engine are highly recommended
|
||||
|
||||
### Strongly recommended
|
||||
|
||||
@@ -211,6 +213,10 @@ ### Strongly recommended
|
||||
|
||||
**Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg)
|
||||
|
||||
* [**yt-dlp-ejs**](https://github.com/yt-dlp/ejs) - Required for deciphering YouTube n/sig values. Licensed under [Unlicense](https://github.com/yt-dlp/ejs/blob/main/LICENSE), bundles [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) and [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) components.
|
||||
|
||||
A JavaScript runtime/engine like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS).
|
||||
|
||||
### Networking
|
||||
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
|
||||
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
@@ -222,7 +228,7 @@ #### Impersonation
|
||||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Can be installed with the `curl-cffi` extra, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Currently included in most builds *except* `yt-dlp` (Unix zipimport binary), `yt-dlp_x86` (Windows 32-bit) and `yt-dlp_musllinux_aarch64`
|
||||
|
||||
|
||||
@@ -235,7 +241,7 @@ ### Metadata
|
||||
### Misc
|
||||
|
||||
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
|
||||
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in some extractors where JavaScript needs to be run. No longer used for YouTube. To be deprecated in the near future. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
|
||||
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
|
||||
* Any external downloader that you want to use with `--downloader`
|
||||
|
||||
@@ -259,12 +265,12 @@ ### Standalone PyInstaller Builds
|
||||
You can run the following commands:
|
||||
|
||||
```
|
||||
python3 devscripts/install_deps.py --include pyinstaller
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
python3 -m bundle.pyinstaller
|
||||
python devscripts/install_deps.py --include-extra pyinstaller
|
||||
python devscripts/make_lazy_extractors.py
|
||||
python -m bundle.pyinstaller
|
||||
```
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
On some systems, you may need to use `py` or `python3` instead of `python`.
|
||||
|
||||
`python -m bundle.pyinstaller` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
|
||||
@@ -354,7 +360,7 @@ ## General Options:
|
||||
containing directory ("-" for stdin). Can be
|
||||
used multiple times and inside other
|
||||
configuration files
|
||||
--plugin-dirs PATH Path to an additional directory to search
|
||||
--plugin-dirs DIR Path to an additional directory to search
|
||||
for plugins. This option can be used
|
||||
multiple times to add multiple directories.
|
||||
Use "default" to search the default plugin
|
||||
@@ -362,6 +368,37 @@ ## General Options:
|
||||
--no-plugin-dirs Clear plugin directories to search,
|
||||
including defaults and those provided by
|
||||
previous --plugin-dirs
|
||||
--js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
|
||||
with an optional location for the runtime
|
||||
(either the path to the binary or its
|
||||
containing directory). This option can be
|
||||
used multiple times to enable multiple
|
||||
runtimes. Supported runtimes are (in order
|
||||
of priority, from highest to lowest): deno,
|
||||
node, quickjs, bun. Only "deno" is enabled
|
||||
by default. The highest priority runtime
|
||||
that is both enabled and available will be
|
||||
used. In order to use a lower priority
|
||||
runtime when "deno" is available, --no-js-
|
||||
runtimes needs to be passed before enabling
|
||||
other runtimes
|
||||
--no-js-runtimes Clear JavaScript runtimes to enable,
|
||||
including defaults and those provided by
|
||||
previous --js-runtimes
|
||||
--remote-components COMPONENT Remote components to allow yt-dlp to fetch
|
||||
when required. This option is currently not
|
||||
needed if you are using an official
|
||||
executable or have the requisite version of
|
||||
the yt-dlp-ejs package installed. You can
|
||||
use this option multiple times to allow
|
||||
multiple components. Supported values:
|
||||
ejs:npm (external JavaScript components from
|
||||
npm), ejs:github (external JavaScript
|
||||
components from yt-dlp-ejs GitHub). By
|
||||
default, no remote components are allowed
|
||||
--no-remote-components Disallow fetching of all remote components,
|
||||
including any previously allowed by
|
||||
--remote-components or defaults.
|
||||
--flat-playlist Do not extract a playlist's URL result
|
||||
entries; some entry metadata may be missing
|
||||
and downloading may be bypassed
|
||||
@@ -446,7 +483,7 @@ ## Geo-restriction:
|
||||
two-letter ISO 3166-2 country code
|
||||
|
||||
## Video Selection:
|
||||
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
||||
-I, --playlist-items ITEM_SPEC Comma-separated playlist_index of the items
|
||||
to download. You can specify a range using
|
||||
"[START]:[STOP][:STEP]". For backward
|
||||
compatibility, START-STOP is also supported.
|
||||
@@ -1079,11 +1116,12 @@ ## SponsorBlock Options:
|
||||
for, separated by commas. Available
|
||||
categories are sponsor, intro, outro,
|
||||
selfpromo, preview, filler, interaction,
|
||||
music_offtopic, poi_highlight, chapter, all
|
||||
and default (=all). You can prefix the
|
||||
category with a "-" to exclude it. See [1]
|
||||
for descriptions of the categories. E.g.
|
||||
--sponsorblock-mark all,-preview
|
||||
music_offtopic, hook, poi_highlight,
|
||||
chapter, all and default (=all). You can
|
||||
prefix the category with a "-" to exclude
|
||||
it. See [1] for descriptions of the
|
||||
categories. E.g. --sponsorblock-mark
|
||||
all,-preview
|
||||
[1] https://wiki.sponsor.ajay.app/w/Segment_Categories
|
||||
--sponsorblock-remove CATS SponsorBlock categories to be removed from
|
||||
the video file, separated by commas. If a
|
||||
@@ -1148,7 +1186,7 @@ # CONFIGURATION
|
||||
You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations:
|
||||
|
||||
1. **Main Configuration**:
|
||||
* The file given to `--config-location`
|
||||
* The file given to `--config-locations`
|
||||
1. **Portable Configuration**: (Recommended for portable installations)
|
||||
* If using a binary, `yt-dlp.conf` in the same directory as the binary
|
||||
* If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp`
|
||||
@@ -1230,7 +1268,7 @@ ### Authentication with netrc
|
||||
|
||||
### Notes about environment variables
|
||||
* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
|
||||
* yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
|
||||
* yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-locations`
|
||||
* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache`
|
||||
* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise
|
||||
* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming`
|
||||
@@ -1261,7 +1299,7 @@ # OUTPUT TEMPLATE
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma-separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
|
||||
|
||||
@@ -1760,8 +1798,8 @@ # MODIFYING METADATA
|
||||
`track` | `track_number`
|
||||
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
|
||||
`composer` | `composer` or `composers`
|
||||
`genre` | `genre` or `genres`
|
||||
`album` | `album`
|
||||
`genre` | `genre`, `genres`, `categories` or `tags`
|
||||
`album` | `album` or `series`
|
||||
`album_artist` | `album_artist` or `album_artists`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
@@ -1814,7 +1852,7 @@ # EXTRACTOR ARGUMENTS
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `android_sdkless,tv,web_safari,web` is used. `android_sdkless` is omitted if cookies are passed. If premium cookies are passed, `tv,web_creator,web_safari,web` is used instead. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime/engine is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web_safari,web` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
@@ -1829,10 +1867,14 @@ #### youtube
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
|
||||
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma-separated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
|
||||
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
|
||||
* `use_ad_playback_context`: Skip preroll ads to eliminate the mandatory wait period before download. Do NOT use this when passing premium account cookies to yt-dlp, as it will result in a loss of premium formats. Only effective with the `web`, `web_safari`, `web_music` and `mweb` player clients. Either `true` or `false` (default)
|
||||
|
||||
#### youtube-ejs
|
||||
* `jitless`: Run supported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered insecure. Either `true` or `false` (default)
|
||||
|
||||
#### youtubepot-webpo
|
||||
* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`
|
||||
|
||||
@@ -4431,3 +4431,43 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
Meriyah | ISC
|
||||
URL: https://github.com/meriyah/meriyah
|
||||
--------------------------------------------------------------------------------
|
||||
ISC License
|
||||
|
||||
Copyright (c) 2019 and later, KFlash and others.
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
Astring | MIT
|
||||
URL: https://github.com/davidbonnet/astring/
|
||||
--------------------------------------------------------------------------------
|
||||
Copyright (c) 2015, David Bonnet <david@bonnet.cc>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
@@ -15,12 +15,12 @@ function venvpy {
|
||||
}
|
||||
|
||||
INCLUDES=(
|
||||
--include pyinstaller
|
||||
--include secretstorage
|
||||
--include-extra pyinstaller
|
||||
--include-extra secretstorage
|
||||
)
|
||||
|
||||
if [[ -z "${EXCLUDE_CURL_CFFI:-}" ]]; then
|
||||
INCLUDES+=(--include curl-cffi)
|
||||
INCLUDES+=(--include-extra curl-cffi)
|
||||
fi
|
||||
|
||||
runpy -m venv /yt-dlp-build-venv
|
||||
@@ -28,7 +28,7 @@ runpy -m venv /yt-dlp-build-venv
|
||||
source /yt-dlp-build-venv/bin/activate
|
||||
# Inside the venv we use venvpy instead of runpy
|
||||
venvpy -m ensurepip --upgrade --default-pip
|
||||
venvpy -m devscripts.install_deps -o --include build
|
||||
venvpy -m devscripts.install_deps --omit-default --include-extra build
|
||||
venvpy -m devscripts.install_deps "${INCLUDES[@]}"
|
||||
venvpy -m devscripts.make_lazy_extractors
|
||||
venvpy devscripts/update-version.py -c "${CHANNEL}" -r "${ORIGIN}" "${VERSION}"
|
||||
|
||||
@@ -308,5 +308,22 @@
|
||||
"action": "add",
|
||||
"when": "2c9091e355a7ba5d1edb69796ecdca48199b77fb",
|
||||
"short": "[priority] **A stopgap release with a *TEMPORARY partial* fix for YouTube support**\nSome formats may still be unavailable, especially if cookies are passed to yt-dlp. The ***NEXT*** release, expected very soon, **will require an external JS runtime (e.g. Deno)** in order for YouTube downloads to work properly. [Read more](https://github.com/yt-dlp/yt-dlp/issues/14404)"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "8636a9bac3bed99984c1e297453660468ecf504b",
|
||||
"short": "Fix 6224a3898821965a7d6a2cb9cc2de40a0fd6e6bc",
|
||||
"authors": ["Grub4K"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "6224a3898821965a7d6a2cb9cc2de40a0fd6e6bc",
|
||||
"short": "[priority] **An external JavaScript runtime is now required for full YouTube support**\nyt-dlp now requires users to have an external JavaScript runtime (e.g. Deno) installed in order to solve the JavaScript challenges presented by YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/15012)"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "c63b4e2a2b81cc78397c8709ef53ffd29bada213",
|
||||
"short": "[cleanup] Misc (#14767)",
|
||||
"authors": ["bashonly", "seproDev", "matyb08"]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -271,6 +271,19 @@ class Dependency:
|
||||
license_url='https://raw.githubusercontent.com/python-websockets/websockets/refs/heads/main/LICENSE',
|
||||
project_url='https://websockets.readthedocs.io/',
|
||||
),
|
||||
# Dependencies of yt-dlp-ejs
|
||||
Dependency(
|
||||
name='Meriyah',
|
||||
license='ISC',
|
||||
license_url='https://raw.githubusercontent.com/meriyah/meriyah/refs/heads/main/LICENSE.md',
|
||||
project_url='https://github.com/meriyah/meriyah',
|
||||
),
|
||||
Dependency(
|
||||
name='Astring',
|
||||
license='MIT',
|
||||
license_url='https://raw.githubusercontent.com/davidbonnet/astring/refs/heads/main/LICENSE',
|
||||
project_url='https://github.com/davidbonnet/astring/',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -22,14 +22,19 @@ def parse_args():
|
||||
'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml',
|
||||
help='input file (default: %(default)s)')
|
||||
parser.add_argument(
|
||||
'-e', '--exclude', metavar='DEPENDENCY', action='append',
|
||||
help='exclude a dependency')
|
||||
'-e', '--exclude-dependency', metavar='DEPENDENCY', action='append',
|
||||
help='exclude a dependency (can be used multiple times)')
|
||||
parser.add_argument(
|
||||
'-i', '--include', metavar='GROUP', action='append',
|
||||
help='include an optional dependency group')
|
||||
'-i', '--include-extra', metavar='EXTRA', action='append',
|
||||
help='include an extra/optional-dependencies list (can be used multiple times)')
|
||||
parser.add_argument(
|
||||
'-o', '--only-optional', action='store_true',
|
||||
help='only install optional dependencies')
|
||||
'-c', '--cherry-pick', metavar='DEPENDENCY', action='append',
|
||||
help=(
|
||||
'only include a specific dependency from the resulting dependency list '
|
||||
'(can be used multiple times)'))
|
||||
parser.add_argument(
|
||||
'-o', '--omit-default', action='store_true',
|
||||
help='omit the "default" extra unless it is explicitly included (it is included by default)')
|
||||
parser.add_argument(
|
||||
'-p', '--print', action='store_true',
|
||||
help='only print requirements to stdout')
|
||||
@@ -39,30 +44,41 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def uniq(arg) -> dict[str, None]:
|
||||
return dict.fromkeys(map(str.lower, arg or ()))
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
project_table = parse_toml(read_file(args.input))['project']
|
||||
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]')
|
||||
optional_groups = project_table['optional-dependencies']
|
||||
excludes = args.exclude or []
|
||||
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<extra_name>[\w-]+)\]')
|
||||
extras = project_table['optional-dependencies']
|
||||
|
||||
def yield_deps(group):
|
||||
for dep in group:
|
||||
excludes = uniq(args.exclude_dependency)
|
||||
only_includes = uniq(args.cherry_pick)
|
||||
include_extras = uniq(args.include_extra)
|
||||
|
||||
def yield_deps(extra):
|
||||
for dep in extra:
|
||||
if mobj := recursive_pattern.fullmatch(dep):
|
||||
yield from optional_groups.get(mobj.group('group_name'), [])
|
||||
yield from extras.get(mobj.group('extra_name'), ())
|
||||
else:
|
||||
yield dep
|
||||
|
||||
targets = []
|
||||
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
||||
targets.extend(project_table['dependencies'])
|
||||
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
||||
targets.extend(yield_deps(optional_groups['default']))
|
||||
targets = {}
|
||||
if not args.omit_default:
|
||||
# legacy: 'dependencies' is empty now
|
||||
targets.update(dict.fromkeys(project_table['dependencies']))
|
||||
targets.update(dict.fromkeys(yield_deps(extras['default'])))
|
||||
|
||||
for include in filter(None, map(optional_groups.get, args.include or [])):
|
||||
targets.extend(yield_deps(include))
|
||||
for include in filter(None, map(extras.get, include_extras)):
|
||||
targets.update(dict.fromkeys(yield_deps(include)))
|
||||
|
||||
targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
|
||||
def target_filter(target):
|
||||
name = re.match(r'[\w-]+', target).group(0).lower()
|
||||
return name not in excludes and (not only_includes or name in only_includes)
|
||||
|
||||
targets = list(filter(target_filter, targets))
|
||||
|
||||
if args.print:
|
||||
for target in targets:
|
||||
|
||||
@@ -251,7 +251,13 @@ class CommitRange:
|
||||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'''
|
||||
(?i:
|
||||
(?:bug\s*)?fix(?:es)?(?:
|
||||
\s+(?:bugs?|regression(?:\s+introduced)?)
|
||||
)?(?:\s+(?:in|for|from|by))?
|
||||
|Improve
|
||||
)\s+([\da-f]{40})''', re.VERBOSE)
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
def __init__(self, start, end, default_author=None):
|
||||
@@ -353,6 +359,13 @@ def apply_overrides(self, overrides):
|
||||
continue
|
||||
commit = Commit(override_hash, override['short'], override.get('authors') or [])
|
||||
logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
|
||||
if match := self.FIXES_RE.search(commit.short):
|
||||
fix_commitish = match.group(1)
|
||||
if fix_commitish in self._commits:
|
||||
del self._commits[commit.hash]
|
||||
self._fixes[fix_commitish].append(commit)
|
||||
logger.info(f'Found fix for {fix_commitish[:HASH_LENGTH]}: {commit.hash[:HASH_LENGTH]}')
|
||||
continue
|
||||
self._commits[commit.hash] = commit
|
||||
|
||||
self._commits = dict(reversed(self._commits.items()))
|
||||
|
||||
@@ -17,6 +17,18 @@ def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
|
||||
parser.add_argument(
|
||||
'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*')
|
||||
parser.add_argument(
|
||||
'--flaky',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='Allow running flaky tests. (default: run, unless in CI)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-flaky',
|
||||
action='store_false',
|
||||
dest='flaky',
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
parser.add_argument(
|
||||
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
|
||||
parser.add_argument(
|
||||
@@ -24,10 +36,11 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def run_tests(*tests, pattern=None, ci=False):
|
||||
def run_tests(*tests, pattern=None, ci=False, flaky: bool | None = None):
|
||||
# XXX: hatch uses `tests` if no arguments are passed
|
||||
run_core = 'core' in tests or 'tests' in tests or (not pattern and not tests)
|
||||
run_download = 'download' in tests
|
||||
run_flaky = flaky or (flaky is None and not ci)
|
||||
|
||||
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
||||
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
|
||||
@@ -44,6 +57,8 @@ def run_tests(*tests, pattern=None, ci=False):
|
||||
test if '/' in test
|
||||
else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}'
|
||||
for test in tests)
|
||||
if not run_flaky:
|
||||
arguments.append('--disallow-flaky')
|
||||
|
||||
print(f'Running {arguments}', flush=True)
|
||||
try:
|
||||
@@ -72,6 +87,11 @@ def run_tests(*tests, pattern=None, ci=False):
|
||||
args = parse_args()
|
||||
|
||||
os.chdir(Path(__file__).parent.parent)
|
||||
sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI'))))
|
||||
sys.exit(run_tests(
|
||||
*args.test,
|
||||
pattern=args.k,
|
||||
ci=bool(os.getenv('CI')),
|
||||
flaky=args.flaky,
|
||||
))
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
166
devscripts/update_ejs.py
Executable file
166
devscripts/update_ejs.py
Executable file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import json
|
||||
import hashlib
|
||||
import pathlib
|
||||
import urllib.request
|
||||
import zipfile
|
||||
|
||||
|
||||
TEMPLATE = '''\
|
||||
# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
|
||||
|
||||
VERSION = {version!r}
|
||||
HASHES = {{
|
||||
{hash_mapping}
|
||||
}}
|
||||
'''
|
||||
PREFIX = ' "yt-dlp-ejs=='
|
||||
BASE_PATH = pathlib.Path(__file__).parent.parent
|
||||
PYPROJECT_PATH = BASE_PATH / 'pyproject.toml'
|
||||
PACKAGE_PATH = BASE_PATH / 'yt_dlp/extractor/youtube/jsc/_builtin/vendor'
|
||||
RELEASE_URL = 'https://api.github.com/repos/yt-dlp/ejs/releases/latest'
|
||||
ASSETS = {
|
||||
'yt.solver.lib.js': False,
|
||||
'yt.solver.lib.min.js': False,
|
||||
'yt.solver.deno.lib.js': True,
|
||||
'yt.solver.bun.lib.js': True,
|
||||
'yt.solver.core.min.js': False,
|
||||
'yt.solver.core.js': True,
|
||||
}
|
||||
MAKEFILE_PATH = BASE_PATH / 'Makefile'
|
||||
|
||||
|
||||
def request(url: str):
|
||||
return contextlib.closing(urllib.request.urlopen(url))
|
||||
|
||||
|
||||
def makefile_variables(
|
||||
version: str | None = None,
|
||||
name: str | None = None,
|
||||
digest: str | None = None,
|
||||
data: bytes | None = None,
|
||||
keys_only: bool = False,
|
||||
) -> dict[str, str | None]:
|
||||
assert keys_only or all(arg is not None for arg in (version, name, digest, data))
|
||||
|
||||
return {
|
||||
'EJS_VERSION': None if keys_only else version,
|
||||
'EJS_WHEEL_NAME': None if keys_only else name,
|
||||
'EJS_WHEEL_HASH': None if keys_only else digest,
|
||||
'EJS_PY_FOLDERS': None if keys_only else list_wheel_contents(data, 'py', files=False),
|
||||
'EJS_PY_FILES': None if keys_only else list_wheel_contents(data, 'py', folders=False),
|
||||
'EJS_JS_FOLDERS': None if keys_only else list_wheel_contents(data, 'js', files=False),
|
||||
'EJS_JS_FILES': None if keys_only else list_wheel_contents(data, 'js', folders=False),
|
||||
}
|
||||
|
||||
|
||||
def list_wheel_contents(
|
||||
wheel_data: bytes,
|
||||
suffix: str | None = None,
|
||||
folders: bool = True,
|
||||
files: bool = True,
|
||||
) -> str:
|
||||
assert folders or files, 'at least one of "folders" or "files" must be True'
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(wheel_data)) as zipf:
|
||||
path_gen = (zinfo.filename for zinfo in zipf.infolist())
|
||||
|
||||
filtered = filter(lambda path: path.startswith('yt_dlp_ejs/'), path_gen)
|
||||
if suffix:
|
||||
filtered = filter(lambda path: path.endswith(f'.{suffix}'), filtered)
|
||||
|
||||
files_list = list(filtered)
|
||||
if not folders:
|
||||
return ' '.join(files_list)
|
||||
|
||||
folders_list = list(dict.fromkeys(path.rpartition('/')[0] for path in files_list))
|
||||
if not files:
|
||||
return ' '.join(folders_list)
|
||||
|
||||
return ' '.join(folders_list + files_list)
|
||||
|
||||
|
||||
def main():
|
||||
current_version = None
|
||||
with PYPROJECT_PATH.open() as file:
|
||||
for line in file:
|
||||
if not line.startswith(PREFIX):
|
||||
continue
|
||||
current_version, _, _ = line.removeprefix(PREFIX).partition('"')
|
||||
|
||||
if not current_version:
|
||||
print('yt-dlp-ejs dependency line could not be found')
|
||||
return
|
||||
|
||||
makefile_info = makefile_variables(keys_only=True)
|
||||
prefixes = tuple(f'{key} = ' for key in makefile_info)
|
||||
with MAKEFILE_PATH.open() as file:
|
||||
for line in file:
|
||||
if not line.startswith(prefixes):
|
||||
continue
|
||||
key, _, val = line.partition(' = ')
|
||||
makefile_info[key] = val.rstrip()
|
||||
|
||||
with request(RELEASE_URL) as resp:
|
||||
info = json.load(resp)
|
||||
|
||||
version = info['tag_name']
|
||||
if version == current_version:
|
||||
print(f'yt-dlp-ejs is up to date! ({version})')
|
||||
return
|
||||
|
||||
print(f'Updating yt-dlp-ejs from {current_version} to {version}')
|
||||
hashes = []
|
||||
wheel_info = {}
|
||||
for asset in info['assets']:
|
||||
name = asset['name']
|
||||
is_wheel = name.startswith('yt_dlp_ejs-') and name.endswith('.whl')
|
||||
if not is_wheel and name not in ASSETS:
|
||||
continue
|
||||
with request(asset['browser_download_url']) as resp:
|
||||
data = resp.read()
|
||||
|
||||
# verify digest from github
|
||||
digest = asset['digest']
|
||||
algo, _, expected = digest.partition(':')
|
||||
hexdigest = hashlib.new(algo, data).hexdigest()
|
||||
assert hexdigest == expected, f'downloaded attest mismatch ({hexdigest!r} != {expected!r})'
|
||||
|
||||
if is_wheel:
|
||||
wheel_info = makefile_variables(version, name, digest, data)
|
||||
continue
|
||||
|
||||
# calculate sha3-512 digest
|
||||
asset_hash = hashlib.sha3_512(data).hexdigest()
|
||||
hashes.append(f' {name!r}: {asset_hash!r},')
|
||||
|
||||
if ASSETS[name]:
|
||||
(PACKAGE_PATH / name).write_bytes(data)
|
||||
|
||||
hash_mapping = '\n'.join(hashes)
|
||||
for asset_name in ASSETS:
|
||||
assert asset_name in hash_mapping, f'{asset_name} not found in release'
|
||||
|
||||
assert all(wheel_info.get(key) for key in makefile_info), 'wheel info not found in release'
|
||||
|
||||
(PACKAGE_PATH / '_info.py').write_text(TEMPLATE.format(
|
||||
version=version,
|
||||
hash_mapping=hash_mapping,
|
||||
))
|
||||
|
||||
content = PYPROJECT_PATH.read_text()
|
||||
updated = content.replace(PREFIX + current_version, PREFIX + version)
|
||||
PYPROJECT_PATH.write_text(updated)
|
||||
|
||||
makefile = MAKEFILE_PATH.read_text()
|
||||
for key in wheel_info:
|
||||
makefile = makefile.replace(f'{key} = {makefile_info[key]}', f'{key} = {wheel_info[key]}')
|
||||
MAKEFILE_PATH.write_text(makefile)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -18,6 +18,7 @@ def build_completion(opt_parser):
|
||||
for opt in group.option_list]
|
||||
opts_file = [opt for opt in opts if opt.metavar == 'FILE']
|
||||
opts_dir = [opt for opt in opts if opt.metavar == 'DIR']
|
||||
opts_path = [opt for opt in opts if opt.metavar == 'PATH']
|
||||
|
||||
fileopts = []
|
||||
for opt in opts_file:
|
||||
@@ -26,6 +27,12 @@ def build_completion(opt_parser):
|
||||
if opt._long_opts:
|
||||
fileopts.extend(opt._long_opts)
|
||||
|
||||
for opt in opts_path:
|
||||
if opt._short_opts:
|
||||
fileopts.extend(opt._short_opts)
|
||||
if opt._long_opts:
|
||||
fileopts.extend(opt._long_opts)
|
||||
|
||||
diropts = []
|
||||
for opt in opts_dir:
|
||||
if opt._short_opts:
|
||||
|
||||
@@ -56,6 +56,7 @@ default = [
|
||||
"requests>=2.32.2,<3",
|
||||
"urllib3>=2.0.2,<3",
|
||||
"websockets>=13.0",
|
||||
"yt-dlp-ejs==0.3.2",
|
||||
]
|
||||
curl-cffi = [
|
||||
"curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.14; implementation_name=='cpython'",
|
||||
@@ -68,7 +69,7 @@ build = [
|
||||
"build",
|
||||
"hatchling>=1.27.0",
|
||||
"pip",
|
||||
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
"setuptools>=71.0.2",
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
@@ -85,7 +86,7 @@ test = [
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0
|
||||
"pyinstaller>=6.17.0", # 6.17.0+ needed for compat with setuptools 81+
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@@ -122,7 +123,12 @@ artifacts = [
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["yt_dlp"]
|
||||
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
|
||||
artifacts = [
|
||||
"/yt_dlp/extractor/lazy_extractors.py",
|
||||
]
|
||||
exclude = [
|
||||
"/yt_dlp/**/*.md",
|
||||
]
|
||||
|
||||
[tool.hatch.build.targets.wheel.shared-data]
|
||||
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
|
||||
|
||||
@@ -12,6 +12,7 @@ # Supported sites
|
||||
- **17live:vod**
|
||||
- **1News**: 1news.co.nz article videos
|
||||
- **1tv**: Первый канал
|
||||
- **1tv:live**: Первый канал (прямой эфир)
|
||||
- **20min**: (**Currently broken**)
|
||||
- **23video**
|
||||
- **247sports**: (**Currently broken**)
|
||||
@@ -49,8 +50,10 @@ # Supported sites
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **AeonCo**
|
||||
- **agalega:videos**
|
||||
- **AirTV**
|
||||
- **AitubeKZVideo**
|
||||
- **Alibaba**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
@@ -93,6 +96,8 @@ # Supported sites
|
||||
- **archive.org**: archive.org video and audio
|
||||
- **ArcPublishing**
|
||||
- **ARD**
|
||||
- **ARDAudiothek**
|
||||
- **ARDAudiothekPlaylist**
|
||||
- **ARDMediathek**
|
||||
- **ARDMediathekCollection**
|
||||
- **Art19**
|
||||
@@ -187,6 +192,7 @@ # Supported sites
|
||||
- **Biography**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **Bitmovin**
|
||||
- **BlackboardCollaborate**
|
||||
- **BlackboardCollaborateLaunch**
|
||||
- **BleacherReport**: (**Currently broken**)
|
||||
@@ -533,7 +539,6 @@ # Supported sites
|
||||
- **google:podcasts:feed**
|
||||
- **GoogleDrive**
|
||||
- **GoogleDrive:Folder**
|
||||
- **GoPlay**: [*goplay*](## "netrc machine")
|
||||
- **GoPro**
|
||||
- **Goshgay**
|
||||
- **GoToStage**
|
||||
@@ -729,7 +734,7 @@ # Supported sites
|
||||
- **loc**: Library of Congress
|
||||
- **Loco**
|
||||
- **loom**
|
||||
- **loom:folder**
|
||||
- **loom:folder**: (**Currently broken**)
|
||||
- **LoveHomePorn**
|
||||
- **LRTRadio**
|
||||
- **LRTStream**
|
||||
@@ -760,7 +765,8 @@ # Supported sites
|
||||
- **massengeschmack.tv**
|
||||
- **Masters**
|
||||
- **MatchTV**
|
||||
- **Mave**
|
||||
- **mave**
|
||||
- **mave:channel**
|
||||
- **MBN**: mbn.co.kr (매일방송)
|
||||
- **MDR**: MDR.DE
|
||||
- **MedalTV**
|
||||
@@ -844,6 +850,7 @@ # Supported sites
|
||||
- **MusicdexArtist**
|
||||
- **MusicdexPlaylist**
|
||||
- **MusicdexSong**
|
||||
- **Mux**
|
||||
- **Mx3**
|
||||
- **Mx3Neo**
|
||||
- **Mx3Volksmusik**
|
||||
@@ -858,6 +865,7 @@ # Supported sites
|
||||
- **n-tv.de**
|
||||
- **N1Info:article**
|
||||
- **N1InfoAsset**
|
||||
- **NascarClassics**
|
||||
- **Nate**
|
||||
- **NateProgram**
|
||||
- **natgeo:video**
|
||||
@@ -891,6 +899,8 @@ # Supported sites
|
||||
- **NerdCubedFeed**
|
||||
- **Nest**
|
||||
- **NestClip**
|
||||
- **NetAppCollection**
|
||||
- **NetAppVideo**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
- **netease:mv**: 网易云音乐 - MV
|
||||
@@ -958,6 +968,7 @@ # Supported sites
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **NovaPlay**
|
||||
- **NowCanal**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -1071,6 +1082,7 @@ # Supported sites
|
||||
- **PlanetMarathi**
|
||||
- **Platzi**: [*platzi*](## "netrc machine")
|
||||
- **PlatziCourse**: [*platzi*](## "netrc machine")
|
||||
- **play.tv**: [*goplay*](## "netrc machine") PLAY (formerly goplay.be)
|
||||
- **player.sky.it**
|
||||
- **PlayerFm**
|
||||
- **playeur**
|
||||
@@ -1368,7 +1380,7 @@ # Supported sites
|
||||
- **Spiegel**
|
||||
- **Sport5**
|
||||
- **SportBox**: (**Currently broken**)
|
||||
- **SportDeutschland**
|
||||
- **sporteurope**
|
||||
- **Spreaker**
|
||||
- **SpreakerShow**
|
||||
- **SpringboardPlatform**
|
||||
@@ -1456,6 +1468,8 @@ # Supported sites
|
||||
- **TFO**: (**Currently broken**)
|
||||
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
|
||||
- **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
|
||||
- **TheChosen**
|
||||
- **TheChosenGroup**
|
||||
- **TheGuardianPodcast**
|
||||
- **TheGuardianPodcastPlaylist**
|
||||
- **TheHighWire**
|
||||
@@ -1559,12 +1573,12 @@ # Supported sites
|
||||
- **TwitCastingLive**
|
||||
- **TwitCastingUser**
|
||||
- **twitch:clips**: [*twitch*](## "netrc machine")
|
||||
- **twitch:collection**: [*twitch*](## "netrc machine")
|
||||
- **twitch:stream**: [*twitch*](## "netrc machine")
|
||||
- **twitch:videos**: [*twitch*](## "netrc machine")
|
||||
- **twitch:videos:clips**: [*twitch*](## "netrc machine")
|
||||
- **twitch:videos:collections**: [*twitch*](## "netrc machine")
|
||||
- **twitch:vod**: [*twitch*](## "netrc machine")
|
||||
- **TwitchCollection**: [*twitch*](## "netrc machine")
|
||||
- **TwitchVideos**: [*twitch*](## "netrc machine")
|
||||
- **TwitchVideosClips**: [*twitch*](## "netrc machine")
|
||||
- **TwitchVideosCollections**: [*twitch*](## "netrc machine")
|
||||
- **twitter**: [*twitter*](## "netrc machine")
|
||||
- **twitter:amplify**: [*twitter*](## "netrc machine")
|
||||
- **twitter:broadcast**: [*twitter*](## "netrc machine")
|
||||
@@ -1773,6 +1787,7 @@ # Supported sites
|
||||
- **YapFiles**: (**Currently broken**)
|
||||
- **Yappy**: (**Currently broken**)
|
||||
- **YappyProfile**
|
||||
- **yfanefa**
|
||||
- **YleAreena**
|
||||
- **YouJizz**
|
||||
- **youku**: 优酷
|
||||
|
||||
@@ -52,6 +52,33 @@ def skip_handlers_if(request, handler):
|
||||
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def handler_flaky(request, handler):
|
||||
"""Mark a certain handler as being flaky.
|
||||
|
||||
This will skip the test if pytest does not get run using `--allow-flaky`
|
||||
|
||||
usage:
|
||||
pytest.mark.handler_flaky('my_handler', os.name != 'nt', reason='reason')
|
||||
"""
|
||||
for marker in request.node.iter_markers(handler_flaky.__name__):
|
||||
if (
|
||||
marker.args[0] == handler.RH_KEY
|
||||
and (not marker.args[1:] or any(marker.args[1:]))
|
||||
and request.config.getoption('disallow_flaky')
|
||||
):
|
||||
reason = marker.kwargs.get('reason')
|
||||
pytest.skip(f'flaky: {reason}' if reason else 'flaky')
|
||||
|
||||
|
||||
def pytest_addoption(parser, pluginmanager):
|
||||
parser.addoption(
|
||||
'--disallow-flaky',
|
||||
action='store_true',
|
||||
help='disallow flaky tests from running.',
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
'markers', 'skip_handler(handler): skip test for the given handler',
|
||||
@@ -62,3 +89,6 @@ def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
'markers', 'skip_handlers_if(handler): skip test for handlers when the condition is true',
|
||||
)
|
||||
config.addinivalue_line(
|
||||
'markers', 'handler_flaky(handler): mark handler as flaky if condition is true',
|
||||
)
|
||||
|
||||
@@ -13,12 +13,10 @@
|
||||
|
||||
import contextlib
|
||||
import copy
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
from yt_dlp.utils import (
|
||||
@@ -337,99 +335,6 @@ def test_format_selection_string_ops(self):
|
||||
ydl = YDL({'format': '[format_id!*=-]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
# FIXME: Rewrite in accordance with the new format sorting options
|
||||
return
|
||||
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
|
||||
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
||||
# and 'vcodec', while in tests such information is incomplete since
|
||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||
# this fix
|
||||
if 'acodec' in info and 'vcodec' not in info:
|
||||
info['vcodec'] = 'none'
|
||||
elif 'vcodec' in info and 'acodec' not in info:
|
||||
info['acodec'] = 'none'
|
||||
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
formats_order = [format_info(f_id) for f_id in order]
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo+bestaudio'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '248+172')
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '38')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137', '141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['248+141'])
|
||||
|
||||
for f1, f2 in itertools.pairwise(formats_order):
|
||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_audio_only_extractor_format_selection(self):
|
||||
# For extractors with incomplete formats (all formats are audio-only or
|
||||
# video-only) best and worst should fallback to corresponding best/worst
|
||||
|
||||
@@ -247,6 +247,7 @@ def ctx(request):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
|
||||
class TestHTTPProxy:
|
||||
def test_http_no_auth(self, handler, ctx):
|
||||
@@ -315,6 +316,7 @@ def test_http_with_idn(self, handler, ctx):
|
||||
('Requests', 'https'),
|
||||
('CurlCFFI', 'https'),
|
||||
], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
class TestHTTPConnectProxy:
|
||||
def test_http_connect_no_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||
|
||||
60
test/test_jsc/conftest.py
Normal file
60
test/test_jsc/conftest.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import re
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
import yt_dlp.globals
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
_TESTDATA_PATH = pathlib.Path(__file__).parent.parent / 'testdata/sigs'
|
||||
_player_re = re.compile(r'^.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$')
|
||||
_player_id_trans = str.maketrans(dict.fromkeys('/.-', '_'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ie() -> InfoExtractor:
|
||||
runtime_names = yt_dlp.globals.supported_js_runtimes.value
|
||||
ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
|
||||
ie = ydl.get_info_extractor('Youtube')
|
||||
|
||||
def _load_player(video_id, player_url, fatal=True):
|
||||
match = _player_re.match(player_url)
|
||||
test_id = match.group('id').translate(_player_id_trans)
|
||||
cached_file = _TESTDATA_PATH / f'player-{test_id}.js'
|
||||
|
||||
if cached_file.exists():
|
||||
return cached_file.read_text()
|
||||
|
||||
if code := ie._download_webpage(player_url, video_id, fatal=fatal):
|
||||
_TESTDATA_PATH.mkdir(exist_ok=True, parents=True)
|
||||
cached_file.write_text(code)
|
||||
return code
|
||||
|
||||
return None
|
||||
|
||||
ie._load_player = _load_player
|
||||
return ie
|
||||
|
||||
|
||||
class MockLogger:
|
||||
def trace(self, message: str):
|
||||
print(f'trace: {message}')
|
||||
|
||||
def debug(self, message: str, *, once=False):
|
||||
print(f'debug: {message}')
|
||||
|
||||
def info(self, message: str):
|
||||
print(f'info: {message}')
|
||||
|
||||
def warning(self, message: str, *, once=False):
|
||||
print(f'warning: {message}')
|
||||
|
||||
def error(self, message: str):
|
||||
print(f'error: {message}')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def logger():
|
||||
return MockLogger()
|
||||
128
test/test_jsc/test_ejs_integration.py
Normal file
128
test/test_jsc/test_ejs_integration.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import enum
|
||||
import importlib.util
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeRequest,
|
||||
JsChallengeType,
|
||||
JsChallengeProviderResponse,
|
||||
JsChallengeResponse,
|
||||
NChallengeInput,
|
||||
NChallengeOutput,
|
||||
SigChallengeInput,
|
||||
SigChallengeOutput,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.quickjs import QuickJSJCP
|
||||
|
||||
|
||||
_has_ejs = bool(importlib.util.find_spec('yt_dlp_ejs'))
|
||||
pytestmark = pytest.mark.skipif(not _has_ejs, reason='yt-dlp-ejs not available')
|
||||
|
||||
|
||||
class Variant(enum.Enum):
|
||||
main = 'player_ias.vflset/en_US/base.js'
|
||||
tcc = 'player_ias_tcc.vflset/en_US/base.js'
|
||||
tce = 'player_ias_tce.vflset/en_US/base.js'
|
||||
es5 = 'player_es5.vflset/en_US/base.js'
|
||||
es6 = 'player_es6.vflset/en_US/base.js'
|
||||
tv = 'tv-player-ias.vflset/tv-player-ias.js'
|
||||
tv_es6 = 'tv-player-es6.vflset/tv-player-es6.js'
|
||||
phone = 'player-plasma-ias-phone-en_US.vflset/base.js'
|
||||
tablet = 'player-plasma-ias-tablet-en_US.vflset/base.js'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Challenge:
|
||||
player: str
|
||||
variant: Variant
|
||||
type: JsChallengeType
|
||||
values: dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
|
||||
def url(self, /):
|
||||
return f'https://www.youtube.com/s/player/{self.player}/{self.variant.value}'
|
||||
|
||||
|
||||
CHALLENGES: list[Challenge] = [
|
||||
Challenge('3d3ba064', Variant.tce, JsChallengeType.N, {
|
||||
'ZdZIqFPQK-Ty8wId': 'qmtUsIz04xxiNW',
|
||||
'4GMrWHyKI5cEvhDO': 'N9gmEX7YhKTSmw',
|
||||
}),
|
||||
Challenge('3d3ba064', Variant.tce, JsChallengeType.SIG, {
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
|
||||
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O',
|
||||
}),
|
||||
Challenge('5ec65609', Variant.tce, JsChallengeType.N, {
|
||||
'0eRGgQWJGfT5rFHFj': '4SvMpDQH-vBJCw',
|
||||
}),
|
||||
Challenge('5ec65609', Variant.tce, JsChallengeType.SIG, {
|
||||
'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I':
|
||||
'AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ==',
|
||||
}),
|
||||
Challenge('6742b2b9', Variant.tce, JsChallengeType.N, {
|
||||
'_HPB-7GFg1VTkn9u': 'qUAsPryAO_ByYg',
|
||||
'K1t_fcB6phzuq2SF': 'Y7PcOt3VE62mog',
|
||||
}),
|
||||
Challenge('6742b2b9', Variant.tce, JsChallengeType.SIG, {
|
||||
'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA':
|
||||
'AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM',
|
||||
}),
|
||||
Challenge('2b83d2e0', Variant.main, JsChallengeType.N, {
|
||||
'0eRGgQWJGfT5rFHFj': 'euHbygrCMLksxd',
|
||||
}),
|
||||
Challenge('2b83d2e0', Variant.main, JsChallengeType.SIG, {
|
||||
'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJA':
|
||||
'-MGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKnMznQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJ',
|
||||
}),
|
||||
Challenge('638ec5c6', Variant.main, JsChallengeType.N, {
|
||||
'ZdZIqFPQK-Ty8wId': '1qov8-KM-yH',
|
||||
}),
|
||||
Challenge('638ec5c6', Variant.main, JsChallengeType.SIG, {
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
|
||||
'MhudCuAuP-6fByOk1_GNXN7gNHHShjyXS2VOgsEItAJz0tipeav0OmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
}),
|
||||
]
|
||||
|
||||
requests: list[JsChallengeRequest] = []
|
||||
responses: list[JsChallengeProviderResponse] = []
|
||||
for test in CHALLENGES:
|
||||
input_type, output_type = {
|
||||
JsChallengeType.N: (NChallengeInput, NChallengeOutput),
|
||||
JsChallengeType.SIG: (SigChallengeInput, SigChallengeOutput),
|
||||
}[test.type]
|
||||
|
||||
request = JsChallengeRequest(test.type, input_type(test.url(), list(test.values.keys())), test.player)
|
||||
requests.append(request)
|
||||
responses.append(JsChallengeProviderResponse(request, JsChallengeResponse(test.type, output_type(test.values))))
|
||||
|
||||
|
||||
@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP, QuickJSJCP])
|
||||
def jcp(request, ie, logger):
|
||||
obj = request.param(ie, logger, None)
|
||||
if not obj.is_available():
|
||||
pytest.skip(f'{obj.PROVIDER_NAME} is not available')
|
||||
obj.is_dev = True
|
||||
return obj
|
||||
|
||||
|
||||
@pytest.mark.download
|
||||
def test_bulk_requests(jcp):
|
||||
assert list(jcp.bulk_solve(requests)) == responses
|
||||
|
||||
|
||||
@pytest.mark.download
|
||||
def test_using_cached_player(jcp):
|
||||
first_player_requests = requests[:3]
|
||||
player = jcp._get_player(first_player_requests[0].video_id, first_player_requests[0].input.player_url)
|
||||
initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, first_player_requests)))
|
||||
preprocessed = initial.pop('preprocessed_player')
|
||||
result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, first_player_requests)))
|
||||
|
||||
assert initial == result
|
||||
194
test/test_jsc/test_provider.py
Normal file
194
test/test_jsc/test_provider.py
Normal file
@@ -0,0 +1,194 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeProvider,
|
||||
JsChallengeRequest,
|
||||
JsChallengeProviderResponse,
|
||||
JsChallengeProviderRejectedRequest,
|
||||
JsChallengeType,
|
||||
JsChallengeResponse,
|
||||
NChallengeOutput,
|
||||
NChallengeInput,
|
||||
JsChallengeProviderError,
|
||||
register_provider,
|
||||
register_preference,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
|
||||
from yt_dlp.utils import ExtractorError
|
||||
from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
|
||||
|
||||
|
||||
class ExampleJCP(JsChallengeProvider):
|
||||
PROVIDER_NAME = 'example-provider'
|
||||
PROVIDER_VERSION = '0.0.1'
|
||||
BUG_REPORT_LOCATION = 'https://example.com/issues'
|
||||
|
||||
_SUPPORTED_TYPES = [JsChallengeType.N]
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def _real_bulk_solve(self, requests):
|
||||
for request in requests:
|
||||
results = dict.fromkeys(request.input.challenges, 'example-solution')
|
||||
response = JsChallengeResponse(
|
||||
type=request.type,
|
||||
output=NChallengeOutput(results=results))
|
||||
yield JsChallengeProviderResponse(request=request, response=response)
|
||||
|
||||
|
||||
PLAYER_URL = 'https://example.com/player.js'
|
||||
|
||||
|
||||
class TestJsChallengeProvider:
|
||||
# note: some test covered in TestPoTokenProvider which shares the same base class
|
||||
def test_base_type(self):
|
||||
assert issubclass(JsChallengeProvider, IEContentProvider)
|
||||
|
||||
def test_create_provider_missing_bulk_solve_method(self, ie, logger):
|
||||
class MissingMethodsJCP(JsChallengeProvider):
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
with pytest.raises(TypeError, match='bulk_solve'):
|
||||
MissingMethodsJCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_missing_available_method(self, ie, logger):
|
||||
class MissingMethodsJCP(JsChallengeProvider):
|
||||
def _real_bulk_solve(self, requests):
|
||||
raise JsChallengeProviderRejectedRequest('Not implemented')
|
||||
|
||||
with pytest.raises(TypeError, match='is_available'):
|
||||
MissingMethodsJCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_barebones_provider(self, ie, logger):
|
||||
class BarebonesProviderJCP(JsChallengeProvider):
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def _real_bulk_solve(self, requests):
|
||||
raise JsChallengeProviderRejectedRequest('Not implemented')
|
||||
|
||||
provider = BarebonesProviderJCP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_KEY == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.0'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
|
||||
|
||||
def test_example_provider_success(self, ie, logger):
|
||||
provider = ExampleJCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
request = JsChallengeRequest(
|
||||
type=JsChallengeType.N,
|
||||
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
|
||||
|
||||
request_two = JsChallengeRequest(
|
||||
type=JsChallengeType.N,
|
||||
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge-2']))
|
||||
|
||||
responses = list(provider.bulk_solve([request, request_two]))
|
||||
assert len(responses) == 2
|
||||
assert all(isinstance(r, JsChallengeProviderResponse) for r in responses)
|
||||
assert responses == [
|
||||
JsChallengeProviderResponse(
|
||||
request=request,
|
||||
response=JsChallengeResponse(
|
||||
type=JsChallengeType.N,
|
||||
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
|
||||
),
|
||||
),
|
||||
JsChallengeProviderResponse(
|
||||
request=request_two,
|
||||
response=JsChallengeResponse(
|
||||
type=JsChallengeType.N,
|
||||
output=NChallengeOutput(results={'example-challenge-2': 'example-solution'}),
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
def test_provider_unsupported_challenge_type(self, ie, logger):
|
||||
provider = ExampleJCP(ie=ie, logger=logger, settings={})
|
||||
request_supported = JsChallengeRequest(
|
||||
type=JsChallengeType.N,
|
||||
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
|
||||
request_unsupported = JsChallengeRequest(
|
||||
type=JsChallengeType.SIG,
|
||||
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
|
||||
responses = list(provider.bulk_solve([request_supported, request_unsupported, request_supported]))
|
||||
assert len(responses) == 3
|
||||
# Requests are validated first before continuing to _real_bulk_solve
|
||||
assert isinstance(responses[0], JsChallengeProviderResponse)
|
||||
assert isinstance(responses[0].error, JsChallengeProviderRejectedRequest)
|
||||
assert responses[0].request is request_unsupported
|
||||
assert str(responses[0].error) == 'JS Challenge type "JsChallengeType.SIG" is not supported by example-provider'
|
||||
|
||||
assert responses[1:] == [
|
||||
JsChallengeProviderResponse(
|
||||
request=request_supported,
|
||||
response=JsChallengeResponse(
|
||||
type=JsChallengeType.N,
|
||||
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
|
||||
),
|
||||
),
|
||||
JsChallengeProviderResponse(
|
||||
request=request_supported,
|
||||
response=JsChallengeResponse(
|
||||
type=JsChallengeType.N,
|
||||
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
def test_provider_get_player(self, ie, logger):
|
||||
ie._load_player = lambda video_id, player_url, fatal: (video_id, player_url, fatal)
|
||||
provider = ExampleJCP(ie=ie, logger=logger, settings={})
|
||||
assert provider._get_player('video123', PLAYER_URL) == ('video123', PLAYER_URL, True)
|
||||
|
||||
def test_provider_get_player_error(self, ie, logger):
|
||||
def raise_error(video_id, player_url, fatal):
|
||||
raise ExtractorError('Failed to load player')
|
||||
|
||||
ie._load_player = raise_error
|
||||
provider = ExampleJCP(ie=ie, logger=logger, settings={})
|
||||
with pytest.raises(JsChallengeProviderError, match='Failed to load player for JS challenge'):
|
||||
provider._get_player('video123', PLAYER_URL)
|
||||
|
||||
def test_require_class_end_with_suffix(self, ie, logger):
|
||||
class InvalidSuffix(JsChallengeProvider):
|
||||
PROVIDER_NAME = 'invalid-suffix'
|
||||
|
||||
def _real_bulk_solve(self, requests):
|
||||
raise JsChallengeProviderRejectedRequest('Not implemented')
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
provider = InvalidSuffix(ie=ie, logger=logger, settings={})
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
provider.PROVIDER_KEY # noqa: B018
|
||||
|
||||
|
||||
def test_register_provider(ie):
|
||||
|
||||
@register_provider
|
||||
class UnavailableProviderJCP(JsChallengeProvider):
|
||||
def is_available(self) -> bool:
|
||||
return False
|
||||
|
||||
def _real_bulk_solve(self, requests):
|
||||
raise JsChallengeProviderRejectedRequest('Not implemented')
|
||||
|
||||
assert _jsc_providers.value.get('UnavailableProvider') == UnavailableProviderJCP
|
||||
_jsc_providers.value.pop('UnavailableProvider')
|
||||
|
||||
|
||||
def test_register_preference(ie):
|
||||
before = len(_jsc_preferences.value)
|
||||
|
||||
@register_preference(ExampleJCP)
|
||||
def unavailable_preference(*args, **kwargs):
|
||||
return 1
|
||||
|
||||
assert len(_jsc_preferences.value) == before + 1
|
||||
@@ -3,6 +3,7 @@
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -311,6 +312,7 @@ def setup_class(cls):
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', os.name == 'nt', reason='segfaults')
|
||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
|
||||
def test_verify_cert(self, handler):
|
||||
@@ -614,8 +616,11 @@ def test_source_address(self, handler):
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||
def test_gzip_trailing_garbage(self, handler):
|
||||
with handler() as rh:
|
||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||
res = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage'))
|
||||
data = res.read().decode()
|
||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
@@ -627,6 +632,8 @@ def test_brotli(self, handler):
|
||||
headers={'ytdl-encoding': 'br'}))
|
||||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
@@ -636,6 +643,8 @@ def test_deflate(self, handler):
|
||||
headers={'ytdl-encoding': 'deflate'}))
|
||||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
@@ -645,6 +654,8 @@ def test_gzip(self, handler):
|
||||
headers={'ytdl-encoding': 'gzip'}))
|
||||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
@@ -655,6 +666,8 @@ def test_multiple_encodings(self, handler):
|
||||
headers={'ytdl-encoding': pair}))
|
||||
assert res.headers.get('Content-Encoding') == pair
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||
def test_unsupported_encoding(self, handler):
|
||||
@@ -665,6 +678,8 @@ def test_unsupported_encoding(self, handler):
|
||||
headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
|
||||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||
assert res.read() == b'raw'
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
def test_read(self, handler):
|
||||
with handler() as rh:
|
||||
@@ -672,9 +687,13 @@ def test_read(self, handler):
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
|
||||
assert res.readable()
|
||||
assert res.read(1) == b'H'
|
||||
# Ensure we don't close the adaptor yet
|
||||
assert not res.closed
|
||||
assert res.read(3) == b'ost'
|
||||
assert res.read().decode().endswith('\n\n')
|
||||
assert res.read() == b''
|
||||
# Should auto-close and mark the response adaptor as closed
|
||||
assert res.closed
|
||||
|
||||
def test_request_disable_proxy(self, handler):
|
||||
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||
@@ -736,8 +755,20 @@ def test_partial_read_then_full_read(self, handler):
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
class TestClientCertificate:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
@@ -875,11 +906,53 @@ def test_file_urls(self, handler):
|
||||
|
||||
with handler(enable_file_urls=True) as rh:
|
||||
res = validate_and_send(rh, req)
|
||||
assert res.read() == b'foobar'
|
||||
res.close()
|
||||
assert res.read(1) == b'f'
|
||||
assert not res.fp.closed
|
||||
assert res.read() == b'oobar'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
|
||||
os.unlink(tf.name)
|
||||
|
||||
def test_data_uri_auto_close(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read() == b'hello world'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_http_response_auto_close(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
|
||||
assert res.read() == b'<html></html>'
|
||||
# Should automatically close the underlying file object in the HTTP Response
|
||||
assert isinstance(res.fp, http.client.HTTPResponse)
|
||||
assert res.fp.fp is None
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(6) == b'hello '
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'world'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(512) == b'hello world'
|
||||
# Response and its underlying file object should already be closed now
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_http_error_returns_content(self, handler):
|
||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||
def get_response():
|
||||
@@ -1012,8 +1085,17 @@ def mock_close(*args, **kwargs):
|
||||
rh.close()
|
||||
assert called
|
||||
|
||||
def test_http_response_auto_close(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
|
||||
assert res.read() == b'<html></html>'
|
||||
# Should automatically close the underlying file object in the HTTP Response
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', os.name == 'nt', reason='segfaults')
|
||||
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
|
||||
|
||||
@pytest.mark.parametrize('params,extensions', [
|
||||
@@ -1177,6 +1259,14 @@ def close(self):
|
||||
assert res4.closed
|
||||
assert res4._buffer == b''
|
||||
|
||||
def test_http_response_auto_close(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
|
||||
assert res.read() == b'<html></html>'
|
||||
# Should automatically close the underlying file object in the HTTP Response
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
@@ -2032,6 +2122,30 @@ def test_compat(self):
|
||||
assert res.info() is res.headers
|
||||
assert res.getheader('test') == res.get_header('test')
|
||||
|
||||
def test_auto_close(self):
|
||||
# Should mark the response as closed if the underlying file is closed
|
||||
class AutoCloseBytesIO(io.BytesIO):
|
||||
def read(self, size=-1, /):
|
||||
data = super().read(size)
|
||||
self.close()
|
||||
return data
|
||||
|
||||
fp = AutoCloseBytesIO(b'test')
|
||||
res = Response(fp, url='test://', headers={}, status=200)
|
||||
assert not res.closed
|
||||
res.read()
|
||||
assert res.closed
|
||||
|
||||
def test_close(self):
|
||||
# Should not call close() on the underlying file when already closed
|
||||
fp = MagicMock()
|
||||
fp.closed = False
|
||||
res = Response(fp, url='test://', headers={}, status=200)
|
||||
res.close()
|
||||
fp.closed = True
|
||||
res.close()
|
||||
assert fp.close.call_count == 1
|
||||
|
||||
|
||||
class TestImpersonateTarget:
|
||||
@pytest.mark.parametrize('target_str,expected', [
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, configuration_arg
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
@@ -627,3 +627,13 @@ def test_logger_log_level(logger):
|
||||
assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG
|
||||
assert logger.LogLevel(10) == logger.LogLevel.DEBUG
|
||||
assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO
|
||||
|
||||
|
||||
def test_configuration_arg():
|
||||
config = {'abc': ['123D'], 'xyz': ['456a', '789B']}
|
||||
|
||||
assert configuration_arg(config, 'abc') == ['123d']
|
||||
assert configuration_arg(config, 'abc', default=['default']) == ['123d']
|
||||
assert configuration_arg(config, 'ABC', default=['default']) == ['default']
|
||||
assert configuration_arg(config, 'abc', casesense=True) == ['123D']
|
||||
assert configuration_arg(config, 'xyz', casesense=False) == ['456a', '789b']
|
||||
|
||||
@@ -295,6 +295,7 @@ def ctx(request):
|
||||
('Websockets', 'ws'),
|
||||
('CurlCFFI', 'http'),
|
||||
], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
class TestSocks4Proxy:
|
||||
def test_socks4_no_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
@@ -370,6 +371,7 @@ def test_timeout(self, handler, ctx):
|
||||
('Websockets', 'ws'),
|
||||
('CurlCFFI', 'http'),
|
||||
], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
class TestSocks5Proxy:
|
||||
|
||||
def test_socks5_no_auth(self, handler, ctx):
|
||||
|
||||
@@ -1403,6 +1403,9 @@ def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
||||
self.assertEqual(version_tuple('invalid', lenient=True), (-1,))
|
||||
self.assertEqual(version_tuple('1.2.3', lenient=True), (1, 2, 3))
|
||||
self.assertEqual(version_tuple('12.34-something', lenient=True), (12, 34, -1))
|
||||
|
||||
def test_detect_exe_version(self):
|
||||
self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
|
||||
|
||||
@@ -38,6 +38,13 @@
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
pytestmark = pytest.mark.handler_flaky(
|
||||
'Websockets',
|
||||
os.name == 'nt' or sys.implementation.name == 'pypy',
|
||||
reason='segfaults',
|
||||
)
|
||||
|
||||
|
||||
def websocket_handler(websocket):
|
||||
for message in websocket:
|
||||
if isinstance(message, bytes):
|
||||
|
||||
@@ -1,504 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import contextlib
|
||||
import re
|
||||
import string
|
||||
import urllib.request
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.jsinterp import JSInterpreter
|
||||
|
||||
_SIG_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
86,
|
||||
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
85,
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
90,
|
||||
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
84,
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js',
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
|
||||
'AAJAJfQdSswRAIgNSN0GDUcHnCIXkKcF61yLBgDHiX1sUhOJdY4_GxunRYCIDeYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9bZ',
|
||||
'AJfQdSswRAIgNSN0GDUcHnCIXkKcF61ZLBgDHiX1sUhOJdY4_GxunRYCIDyYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9be',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
(
|
||||
'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
|
||||
'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
|
||||
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
|
||||
'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
|
||||
'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
|
||||
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
|
||||
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
|
||||
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
|
||||
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
|
||||
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
||||
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
|
||||
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
||||
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
|
||||
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
|
||||
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
|
||||
'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
|
||||
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
|
||||
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
|
||||
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
||||
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||
'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
|
||||
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
|
||||
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
|
||||
'0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
|
||||
'0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
|
||||
'6l5CTNx4AzIqH4MXM', 'NupToduxHBew1g',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
|
||||
('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
|
||||
# obsolete
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
|
||||
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
|
||||
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||
)
|
||||
for player_url, expected_player_id in PLAYER_URLS:
|
||||
player_id = YoutubeIE._extract_player_info(player_url)
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
with contextlib.suppress(OSError):
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
m = url_pattern.match(url)
|
||||
assert m, f'{url!r} should follow URL format'
|
||||
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
||||
|
||||
def test_func(self):
|
||||
basename = f'player-{test_id}.js'
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
urllib.request.urlretrieve(url, fn)
|
||||
with open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
|
||||
|
||||
test_func.__name__ = f'test_{name}_js_{test_id}'
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
return make_tfunc
|
||||
|
||||
|
||||
def signature(jscode, sig_input, player_url):
|
||||
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
|
||||
src_sig = (
|
||||
str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
return func(src_sig)
|
||||
|
||||
|
||||
def n_sig(jscode, sig_input, player_url):
|
||||
ie = YoutubeIE(FakeYDL())
|
||||
funcname = ie._extract_n_function_name(jscode, player_url=player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
|
||||
return func([sig_input])
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
'signature', signature,
|
||||
re.compile(r'''(?x)
|
||||
.+(?:
|
||||
/player/(?P<id>[a-zA-Z0-9_/.-]+)|
|
||||
/html5player-(?:en_US-)?(?P<compat_id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?
|
||||
)\.js$'''))
|
||||
for test_spec in _SIG_TESTS:
|
||||
make_sig_test(*test_spec)
|
||||
|
||||
make_nsig_test = t_factory(
|
||||
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
|
||||
for test_spec in _NSIG_TESTS:
|
||||
make_nsig_test(*test_spec)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -42,6 +42,8 @@
|
||||
plugin_pps,
|
||||
all_plugins_loaded,
|
||||
plugin_dirs,
|
||||
supported_js_runtimes,
|
||||
supported_remote_components,
|
||||
)
|
||||
from .minicurses import format_text
|
||||
from .networking import HEADRequest, Request, RequestDirector
|
||||
@@ -533,6 +535,18 @@ class YoutubeDL:
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Argument values must always be a list of string(s).
|
||||
E.g. {'youtube': {'skip': ['dash', 'hls']}}
|
||||
js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
|
||||
and a dictionary of additional configuration for the runtime.
|
||||
Currently supported runtimes are 'deno', 'node', 'bun', and 'quickjs'.
|
||||
If None, the default runtime of "deno" will be enabled.
|
||||
The runtime configuration dictionary can have the following keys:
|
||||
- path: Path to the executable (optional)
|
||||
E.g. {'deno': {'path': '/path/to/deno'}
|
||||
remote_components: A list of remote components that are allowed to be fetched when required.
|
||||
Supported components:
|
||||
- ejs:npm (external JavaScript components from npm)
|
||||
- ejs:github (external JavaScript components from yt-dlp-ejs GitHub)
|
||||
By default, no remote components are allowed to be fetched.
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
|
||||
The following options are deprecated and may be removed in the future:
|
||||
@@ -717,6 +731,13 @@ def process_color_policy(stream):
|
||||
else:
|
||||
raise
|
||||
|
||||
# Note: this must be after plugins are loaded
|
||||
self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
|
||||
self._clean_js_runtimes(self.params['js_runtimes'])
|
||||
|
||||
self.params['remote_components'] = set(self.params.get('remote_components', ()))
|
||||
self._clean_remote_components(self.params['remote_components'])
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
@@ -829,6 +850,36 @@ def preload_download_archive(fn):
|
||||
|
||||
self.archive = preload_download_archive(self.params.get('download_archive'))
|
||||
|
||||
def _clean_js_runtimes(self, runtimes):
|
||||
if not (
|
||||
isinstance(runtimes, dict)
|
||||
and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
|
||||
):
|
||||
raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
|
||||
|
||||
if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
|
||||
self.report_warning(
|
||||
f'Ignoring unsupported JavaScript runtime(s): {", ".join(unsupported_runtimes)}.'
|
||||
f' Supported runtimes: {", ".join(supported_js_runtimes.value.keys())}.')
|
||||
for rt in unsupported_runtimes:
|
||||
runtimes.pop(rt)
|
||||
|
||||
def _clean_remote_components(self, remote_components: set):
|
||||
if unsupported_remote_components := set(remote_components) - set(supported_remote_components.value):
|
||||
self.report_warning(
|
||||
f'Ignoring unsupported remote component(s): {", ".join(unsupported_remote_components)}.'
|
||||
f' Supported remote components: {", ".join(supported_remote_components.value)}.')
|
||||
for rt in unsupported_remote_components:
|
||||
remote_components.remove(rt)
|
||||
|
||||
@functools.cached_property
|
||||
def _js_runtimes(self):
|
||||
runtimes = {}
|
||||
for name, config in self.params.get('js_runtimes', {}).items():
|
||||
runtime_cls = supported_js_runtimes.value.get(name)
|
||||
runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
|
||||
return runtimes
|
||||
|
||||
def warn_if_short_id(self, argv):
|
||||
# short YouTube ID starting with dash?
|
||||
idxs = [
|
||||
@@ -4064,6 +4115,18 @@ def get_encoding(stream):
|
||||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||||
})) or 'none'))
|
||||
|
||||
if not self.params.get('js_runtimes'):
|
||||
write_debug('JS runtimes: none (disabled)')
|
||||
else:
|
||||
write_debug('JS runtimes: %s' % (', '.join(sorted(
|
||||
f'{name} (unknown)' if runtime is None
|
||||
else join_nonempty(
|
||||
runtime.info.name,
|
||||
runtime.info.version + (' (unsupported)' if runtime.info.supported is False else ''),
|
||||
)
|
||||
for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
|
||||
)) or 'none'))
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
|
||||
|
||||
@@ -61,8 +61,15 @@
|
||||
shell_quote,
|
||||
variadic,
|
||||
write_string,
|
||||
|
||||
)
|
||||
from .utils._utils import _UnsafeExtensionError
|
||||
from .utils._jsruntime import (
|
||||
BunJsRuntime as _BunJsRuntime,
|
||||
DenoJsRuntime as _DenoJsRuntime,
|
||||
NodeJsRuntime as _NodeJsRuntime,
|
||||
QuickJsRuntime as _QuickJsRuntime,
|
||||
)
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@@ -773,6 +780,10 @@ def parse_options(argv=None):
|
||||
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
|
||||
else None)
|
||||
|
||||
js_runtimes = {
|
||||
runtime.lower(): {'path': path} for runtime, path in (
|
||||
[*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
|
||||
|
||||
return ParsedOptions(parser, opts, urls, {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'netrc_location': opts.netrc_location,
|
||||
@@ -940,6 +951,8 @@ def parse_options(argv=None):
|
||||
'geo_bypass_country': opts.geo_bypass_country,
|
||||
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||
'useid': opts.useid or None,
|
||||
'js_runtimes': js_runtimes,
|
||||
'remote_components': opts.remote_components,
|
||||
'warn_when_outdated': opts.update_self is None,
|
||||
'_warnings': warnings,
|
||||
'_deprecation_warnings': deprecation_warnings,
|
||||
@@ -1081,6 +1094,16 @@ def main(argv=None):
|
||||
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
|
||||
# Register JS runtimes and remote components
|
||||
from .globals import supported_js_runtimes, supported_remote_components
|
||||
supported_js_runtimes.value['deno'] = _DenoJsRuntime
|
||||
supported_js_runtimes.value['node'] = _NodeJsRuntime
|
||||
supported_js_runtimes.value['bun'] = _BunJsRuntime
|
||||
supported_js_runtimes.value['quickjs'] = _QuickJsRuntime
|
||||
|
||||
supported_remote_components.value.append('ejs:github')
|
||||
supported_remote_components.value.append('ejs:npm')
|
||||
|
||||
__all__ = [
|
||||
'YoutubeDL',
|
||||
'gen_extractors',
|
||||
|
||||
@@ -34,3 +34,4 @@ def get_hidden_imports():
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
|
||||
|
||||
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
|
||||
datas += collect_data_files('yt_dlp_ejs', includes=['**/*.js'])
|
||||
|
||||
@@ -125,7 +125,7 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(),
|
||||
|
||||
|
||||
def _extract_firefox_cookies(profile, container, logger):
|
||||
MAX_SUPPORTED_DB_SCHEMA_VERSION = 16
|
||||
MAX_SUPPORTED_DB_SCHEMA_VERSION = 17
|
||||
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not sqlite3:
|
||||
@@ -166,6 +166,8 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
db_schema_version = cursor.execute('PRAGMA user_version;').fetchone()[0]
|
||||
if db_schema_version > MAX_SUPPORTED_DB_SCHEMA_VERSION:
|
||||
logger.warning(f'Possibly unsupported firefox cookies database version: {db_schema_version}')
|
||||
else:
|
||||
logger.debug(f'Firefox cookies database version: {db_schema_version}')
|
||||
if isinstance(container_id, int):
|
||||
logger.debug(
|
||||
f'Only loading cookies from firefox container "{container}", ID {container_id}')
|
||||
@@ -210,9 +212,16 @@ def _firefox_browser_dirs():
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, (
|
||||
# New installations of FF147+ respect the XDG base directory specification
|
||||
# Ref: https://bugzilla.mozilla.org/show_bug.cgi?id=259356
|
||||
os.path.join(_config_home(), 'mozilla/firefox'),
|
||||
# Existing FF version<=146 installations
|
||||
'~/.mozilla/firefox',
|
||||
'~/snap/firefox/common/.mozilla/firefox',
|
||||
# Flatpak XDG: https://docs.flatpak.org/en/latest/conventions.html#xdg-base-directories
|
||||
'~/.var/app/org.mozilla.firefox/config/mozilla/firefox',
|
||||
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||
# Snap installations do not respect the XDG base directory specification
|
||||
'~/snap/firefox/common/.mozilla/firefox',
|
||||
))
|
||||
|
||||
|
||||
@@ -557,7 +566,7 @@ def decrypt(self, encrypted_value):
|
||||
|
||||
|
||||
def _extract_safari_cookies(profile, logger):
|
||||
if sys.platform != 'darwin':
|
||||
if sys.platform not in ('darwin', 'ios'):
|
||||
raise ValueError(f'unsupported platform: {sys.platform}')
|
||||
|
||||
if profile:
|
||||
|
||||
@@ -81,6 +81,12 @@
|
||||
|
||||
from . import Cryptodome
|
||||
|
||||
try:
|
||||
import yt_dlp_ejs
|
||||
except ImportError:
|
||||
yt_dlp_ejs = None
|
||||
|
||||
|
||||
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
|
||||
available_dependencies = {k: v for k, v in all_dependencies.items() if v}
|
||||
|
||||
|
||||
@@ -461,7 +461,8 @@ def download(self, filename, info_dict, subtitle=False):
|
||||
min_sleep_interval = self.params.get('sleep_interval') or 0
|
||||
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
||||
|
||||
if available_at := info_dict.get('available_at'):
|
||||
requested_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
if available_at := max(f.get('available_at') or 0 for f in requested_formats):
|
||||
forced_sleep_interval = available_at - int(time.time())
|
||||
if forced_sleep_interval > min_sleep_interval:
|
||||
sleep_note = 'as required by the site'
|
||||
|
||||
@@ -457,6 +457,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
@@ -488,20 +490,6 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||
|
||||
# These exists only for compatibility. Extractors should use
|
||||
# info_dict['downloader_options']['ffmpeg_args'] instead
|
||||
args += info_dict.get('_ffmpeg_args') or []
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||
# can cause problems in some cases
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
@@ -521,39 +509,10 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
env['HTTP_PROXY'] = proxy
|
||||
env['http_proxy'] = proxy
|
||||
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
if protocol == 'rtmp':
|
||||
player_url = info_dict.get('player_url')
|
||||
page_url = info_dict.get('page_url')
|
||||
app = info_dict.get('app')
|
||||
play_path = info_dict.get('play_path')
|
||||
tc_url = info_dict.get('tc_url')
|
||||
flash_version = info_dict.get('flash_version')
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn')
|
||||
if player_url is not None:
|
||||
args += ['-rtmp_swfverify', player_url]
|
||||
if page_url is not None:
|
||||
args += ['-rtmp_pageurl', page_url]
|
||||
if app is not None:
|
||||
args += ['-rtmp_app', app]
|
||||
if play_path is not None:
|
||||
args += ['-rtmp_playpath', play_path]
|
||||
if tc_url is not None:
|
||||
args += ['-rtmp_tcurl', tc_url]
|
||||
if flash_version is not None:
|
||||
args += ['-rtmp_flashver', flash_version]
|
||||
if live:
|
||||
args += ['-rtmp_live', 'live']
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
args += ['-rtmp_conn', entry]
|
||||
elif isinstance(conn, str):
|
||||
args += ['-rtmp_conn', conn]
|
||||
|
||||
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
|
||||
|
||||
fallback_input_args = traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
is_http = re.match(r'https?://', fmt['url'])
|
||||
@@ -572,6 +531,44 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
protocol = fmt.get('protocol')
|
||||
|
||||
if protocol == 'rtmp':
|
||||
player_url = fmt.get('player_url')
|
||||
page_url = fmt.get('page_url')
|
||||
app = fmt.get('app')
|
||||
play_path = fmt.get('play_path')
|
||||
tc_url = fmt.get('tc_url')
|
||||
flash_version = fmt.get('flash_version')
|
||||
live = fmt.get('rtmp_live', False)
|
||||
conn = fmt.get('rtmp_conn')
|
||||
if player_url is not None:
|
||||
args += ['-rtmp_swfverify', player_url]
|
||||
if page_url is not None:
|
||||
args += ['-rtmp_pageurl', page_url]
|
||||
if app is not None:
|
||||
args += ['-rtmp_app', app]
|
||||
if play_path is not None:
|
||||
args += ['-rtmp_playpath', play_path]
|
||||
if tc_url is not None:
|
||||
args += ['-rtmp_tcurl', tc_url]
|
||||
if flash_version is not None:
|
||||
args += ['-rtmp_flashver', flash_version]
|
||||
if live:
|
||||
args += ['-rtmp_live', 'live']
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
args += ['-rtmp_conn', entry]
|
||||
elif isinstance(conn, str):
|
||||
args += ['-rtmp_conn', conn]
|
||||
|
||||
elif protocol == 'http_dash_segments' and info_dict.get('is_live'):
|
||||
# ffmpeg may try to read past the latest available segments for
|
||||
# live DASH streams unless we pass `-re`. In modern ffmpeg, this
|
||||
# is an alias of `-readrate 1`, but `-readrate` was not added
|
||||
# until ffmpeg 5.0, so we must stick to using `-re`
|
||||
args += ['-re']
|
||||
|
||||
url = fmt['url']
|
||||
if self.params.get('enable_file_urls') and url.startswith('file:'):
|
||||
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
|
||||
@@ -586,6 +583,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
# https://trac.ffmpeg.org/ticket/2702
|
||||
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
|
||||
|
||||
args += traverse_obj(fmt, ('downloader_options', 'ffmpeg_args', ...)) or fallback_input_args
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
|
||||
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
|
||||
@@ -75,6 +75,7 @@
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .agalega import AGalegaIE
|
||||
from .agora import (
|
||||
TokFMAuditionIE,
|
||||
TokFMPodcastIE,
|
||||
@@ -83,6 +84,7 @@
|
||||
)
|
||||
from .airtv import AirTVIE
|
||||
from .aitube import AitubeKZVideoIE
|
||||
from .alibaba import AlibabaIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .allocine import AllocineIE
|
||||
@@ -143,6 +145,8 @@
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDAudiothekIE,
|
||||
ARDAudiothekPlaylistIE,
|
||||
ARDBetaMediathekIE,
|
||||
ARDMediathekCollectionIE,
|
||||
)
|
||||
@@ -266,6 +270,7 @@
|
||||
BitChuteChannelIE,
|
||||
BitChuteIE,
|
||||
)
|
||||
from .bitmovin import BitmovinIE
|
||||
from .blackboardcollaborate import (
|
||||
BlackboardCollaborateIE,
|
||||
BlackboardCollaborateLaunchIE,
|
||||
@@ -638,7 +643,10 @@
|
||||
FilmOnIE,
|
||||
)
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .firsttv import (
|
||||
FirstTVIE,
|
||||
FirstTVLiveIE,
|
||||
)
|
||||
from .fivetv import FiveTVIE
|
||||
from .flextv import FlexTVIE
|
||||
from .flickr import FlickrIE
|
||||
@@ -685,6 +693,10 @@
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .frontro import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
@@ -1088,7 +1100,10 @@
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import MaveIE
|
||||
from .mave import (
|
||||
MaveChannelIE,
|
||||
MaveIE,
|
||||
)
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@@ -1195,6 +1210,7 @@
|
||||
MusicdexPlaylistIE,
|
||||
MusicdexSongIE,
|
||||
)
|
||||
from .mux import MuxIE
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
Mx3NeoIE,
|
||||
@@ -1216,6 +1232,7 @@
|
||||
N1InfoAssetIE,
|
||||
N1InfoIIE,
|
||||
)
|
||||
from .nascar import NascarClassicsIE
|
||||
from .nate import (
|
||||
NateIE,
|
||||
NateProgramIE,
|
||||
@@ -1269,6 +1286,10 @@
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .netapp import (
|
||||
NetAppCollectionIE,
|
||||
NetAppVideoIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@@ -1361,6 +1382,7 @@
|
||||
NovaIE,
|
||||
)
|
||||
from .novaplay import NovaPlayIE
|
||||
from .nowcanal import NowCanalIE
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
@@ -2514,6 +2536,7 @@
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yfanefa import YfanefaIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
|
||||
@@ -321,6 +321,8 @@ def tokenize_url(url, token):
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
else:
|
||||
formats = []
|
||||
|
||||
subtitles = {}
|
||||
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||
|
||||
91
yt_dlp/extractor/agalega.py
Normal file
91
yt_dlp/extractor/agalega.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import jwt_decode_hs256, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AGalegaBaseIE(InfoExtractor):
|
||||
_access_token = None
|
||||
|
||||
@staticmethod
|
||||
def _jwt_is_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _refresh_access_token(self, video_id):
|
||||
AGalegaBaseIE._access_token = self._download_json(
|
||||
'https://www.agalega.gal/api/fetch-api/jwt/token', video_id,
|
||||
note='Downloading access token',
|
||||
data=json.dumps({
|
||||
'username': None,
|
||||
'password': None,
|
||||
'client': 'crtvg',
|
||||
'checkExistsCookies': False,
|
||||
}).encode())['access']
|
||||
|
||||
def _call_api(self, endpoint, display_id, note, fatal=True, query=None):
|
||||
if not AGalegaBaseIE._access_token or self._jwt_is_expired(AGalegaBaseIE._access_token):
|
||||
self._refresh_access_token(endpoint)
|
||||
return self._download_json(
|
||||
f'https://api-agalega.interactvty.com/api/2.0/contents/{endpoint}', display_id,
|
||||
note=note, fatal=fatal, query=query,
|
||||
headers={'Authorization': f'jwtok {AGalegaBaseIE._access_token}'})
|
||||
|
||||
|
||||
class AGalegaIE(AGalegaBaseIE):
|
||||
IE_NAME = 'agalega:videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?agalega\.gal/videos/(?:detail/)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.agalega.gal/videos/288664-lr-ninguencheconta',
|
||||
'md5': '04533a66c5f863d08dd9724b11d1c223',
|
||||
'info_dict': {
|
||||
'id': '288664',
|
||||
'title': 'Roberto e Ángel Martín atenden consultas dos espectadores',
|
||||
'description': 'O cómico ademais fai un repaso dalgúns momentos da súa traxectoria profesional',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/2ef32c3b9f6249d9868fd8f11d389d3d.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.agalega.gal/videos/detail/296152-pulso-activo-7',
|
||||
'md5': '26df7fdcf859f38ad92d837279d6b56d',
|
||||
'info_dict': {
|
||||
'id': '296152',
|
||||
'title': 'Pulso activo | 18-11-2025',
|
||||
'description': 'Anxo, Noemí, Silvia e Estrella comparten as sensacións da clase de Eddy.',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/a6bb7da6c8994b82bf961ac6cad1707b.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content_data = self._call_api(
|
||||
f'content/{video_id}/', video_id, note='Downloading content data', fatal=False,
|
||||
query={
|
||||
'optional_fields': 'image,is_premium,short_description,has_subtitle',
|
||||
})
|
||||
resource_data = self._call_api(
|
||||
f'content_resources/{video_id}/', video_id, note='Downloading resource data',
|
||||
query={
|
||||
'optional_fields': 'media_url',
|
||||
})
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for m3u8_url in traverse_obj(resource_data, ('results', ..., 'media_url', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('description', 'short_description'), {str}, any),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
42
yt_dlp/extractor/alibaba.py
Normal file
42
yt_dlp/extractor/alibaba.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AlibabaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alibaba\.com/product-detail/[\w-]+_(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alibaba.com/product-detail/Kids-Entertainment-Bouncer-Bouncy-Castle-Waterslide_1601271126969.html',
|
||||
'info_dict': {
|
||||
'id': '6000280444270',
|
||||
'display_id': '1601271126969',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kids Entertainment Bouncer Bouncy Castle Waterslide Juex Gonflables Commercial Inflatable Tropical Water Slide',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://sc04.alicdn.com/kf/Hc5bb391974454af18c7a4f91cbe4062bg.jpg_120x120.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
product_data = self._search_json(
|
||||
r'window\.detailData\s*=', webpage, 'detail data', display_id)['globalData']['product']
|
||||
|
||||
return {
|
||||
**traverse_obj(product_data, ('mediaItems', lambda _, v: v['type'] == 'video' and v['videoId'], any, {
|
||||
'id': ('videoId', {int}, {str_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('videoCoverUrl', {url_or_none}),
|
||||
'formats': ('videoUrl', lambda _, v: url_or_none(v['videoUrl']), {
|
||||
'url': 'videoUrl',
|
||||
'format_id': ('definition', {str_or_none}),
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('length', {int_or_none}),
|
||||
}),
|
||||
})),
|
||||
'title': traverse_obj(product_data, ('subject', {str})),
|
||||
'display_id': display_id,
|
||||
}
|
||||
@@ -5,12 +5,9 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from .youtube import YoutubeBaseInfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
@@ -21,18 +18,14 @@
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -471,7 +464,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
'info_dict': {
|
||||
'id': 'lTx3G6h2xyA',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madeon - Pop Culture (live mashup)',
|
||||
'upload_date': '20110711',
|
||||
'uploader': 'Madeon',
|
||||
@@ -578,7 +571,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
'info_dict': {
|
||||
'id': 'Q_yjX80U7Yc',
|
||||
'ext': 'flv',
|
||||
'ext': 'webm',
|
||||
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
|
||||
'uploader_id': 'claybutlermusic',
|
||||
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
|
||||
@@ -680,6 +673,55 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
},
|
||||
}, {
|
||||
# Contains split audio/video formats
|
||||
'url': 'ytarchive:o_T_S_TU12M',
|
||||
'info_dict': {
|
||||
'id': 'o_T_S_TU12M',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prairie Pulse 1218; Lin Enger, Paul Olson',
|
||||
'description': 'md5:36e7a34cdc8508e35a920ec042e799c7',
|
||||
'uploader': 'Prairie Public',
|
||||
'channel_id': 'UC4BOzQel6tvJm7OEDd3vZlw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4BOzQel6tvJm7OEDd3vZlw',
|
||||
'duration': 1606,
|
||||
'upload_date': '20150213',
|
||||
},
|
||||
}, {
|
||||
# Video unavailable through wayback-fakeurl
|
||||
'url': 'ytarchive:SQCom7wjGDs',
|
||||
'info_dict': {
|
||||
'id': 'SQCom7wjGDs',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jamin Warren from PBS Game/Show decides that Portal is a feminist Game [Top Hats and No Brain]',
|
||||
'description': 'md5:c0cb876dd075483ead9afcc86798efb0',
|
||||
'uploader': 'Top Hats and Champagne',
|
||||
'uploader_id': 'sparrowtm',
|
||||
'uploader_url': 'https://www.youtube.com/user/sparrowtm',
|
||||
'channel_id': 'UCW3T5nG4iEkI7HjG-Du3HQA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCW3T5nG4iEkI7HjG-Du3HQA',
|
||||
'duration': 1500,
|
||||
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
||||
'upload_date': '20160107',
|
||||
},
|
||||
}, {
|
||||
# dmuxed formats
|
||||
'url': 'https://web.archive.org/web/20240922160632/https://www.youtube.com/watch?v=z7hzvTL3k1k',
|
||||
'info_dict': {
|
||||
'id': 'z7hzvTL3k1k',
|
||||
'ext': 'webm',
|
||||
'title': 'Praise the Lord and Pass the Ammunition (BARRXN REMIX)',
|
||||
'description': 'md5:45dbf2c71c23b0734c8dfb82dd1e94b6',
|
||||
'uploader': 'Barrxn',
|
||||
'uploader_id': 'TheRockstar6086',
|
||||
'uploader_url': 'https://www.youtube.com/user/TheRockstar6086',
|
||||
'channel_id': 'UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'duration': 125,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20201207',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True,
|
||||
@@ -724,6 +766,113 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
_FORMATS = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'vcodec': 'h263'},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'vcodec': 'h263'},
|
||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
||||
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
|
||||
|
||||
# 3D videos
|
||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||
|
||||
# Apple HTTP Live Streaming
|
||||
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'138': {'ext': 'mp4', 'vcodec': 'h264', 'acodec': 'none'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'212': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
|
||||
'299': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'vcodec': 'h264', 'acodec': 'none'},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||
'140': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||
'141': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||
'256': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||
'258': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||
'325': {'ext': 'm4a', 'acodec': 'dtse', 'vcodec': 'none'},
|
||||
'328': {'ext': 'm4a', 'acodec': 'ec-3', 'vcodec': 'none'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'vcodec': 'vp8'},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'vcodec': 'vp8'},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'vcodec': 'vp8'},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||
'278': {'ext': 'webm', 'height': 144, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'242': {'ext': 'webm', 'height': 240, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'243': {'ext': 'webm', 'height': 360, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'244': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'245': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'246': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'247': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||
'272': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'302': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
|
||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
|
||||
|
||||
# Dash webm audio with opus inside
|
||||
'249': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||
'250': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||
'251': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||
|
||||
# av01 video only formats sometimes served with "unknown" codecs
|
||||
'394': {'ext': 'mp4', 'height': 144, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
|
||||
'395': {'ext': 'mp4', 'height': 240, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
|
||||
'396': {'ext': 'mp4', 'height': 360, 'vcodec': 'av01.0.01M.08', 'acodec': 'none'},
|
||||
'397': {'ext': 'mp4', 'height': 480, 'vcodec': 'av01.0.04M.08', 'acodec': 'none'},
|
||||
'398': {'ext': 'mp4', 'height': 720, 'vcodec': 'av01.0.05M.08', 'acodec': 'none'},
|
||||
'399': {'ext': 'mp4', 'height': 1080, 'vcodec': 'av01.0.08M.08', 'acodec': 'none'},
|
||||
'400': {'ext': 'mp4', 'height': 1440, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
|
||||
'401': {'ext': 'mp4', 'height': 2160, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
|
||||
}
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
@@ -929,27 +1078,30 @@ def _get_capture_dates(self, video_id, url_date):
|
||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
|
||||
def _parse_fmt(self, fmt, extra_info=None):
|
||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||
return {
|
||||
'format_id': format_id,
|
||||
**self._FORMATS.get(format_id, {}),
|
||||
**traverse_obj(fmt, {
|
||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||
'ext': ('ext', {str}),
|
||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||
}),
|
||||
**(extra_info or {}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
urlh = None
|
||||
retry_manager = self.RetryManager(fatal=False)
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
retry.error = e
|
||||
video_info = self._download_json(
|
||||
'https://web.archive.org/__wb/videoinfo', video_id,
|
||||
query={'vtype': 'youtube', 'vid': video_id})
|
||||
|
||||
if retry_manager.error:
|
||||
self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
|
||||
if not traverse_obj(video_info, 'formats'):
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived or indexed', expected=True)
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
@@ -968,25 +1120,15 @@ def _real_extract(self, url):
|
||||
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = urllib.parse.unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
||||
if itag and itag in YoutubeIE._formats:
|
||||
fmt.update(YoutubeIE._formats[itag])
|
||||
fmt.update({'format_id': itag})
|
||||
else:
|
||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
||||
ext = (mimetype2ext(mime)
|
||||
or urlhandle_detect_ext(urlh)
|
||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
||||
fmt.update({'ext': ext})
|
||||
info['formats'] = [fmt]
|
||||
if not info.get('duration'):
|
||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
||||
formats = []
|
||||
if video_info.get('dmux'):
|
||||
for vf in traverse_obj(video_info, ('formats', 'video', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(vf, {'acodec': 'none'}))
|
||||
for af in traverse_obj(video_info, ('formats', 'audio', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(af, {'vcodec': 'none'}))
|
||||
else:
|
||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(fmt))
|
||||
info['formats'] = formats
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -15,11 +16,12 @@
|
||||
remove_start,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
@@ -601,3 +603,163 @@ def fetch_page(page_num):
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
|
||||
title=page_data.get('title'), description=page_data.get('synopsis'))
|
||||
|
||||
|
||||
class ARDAudiothekBaseIE(InfoExtractor):
|
||||
def _graphql_query(self, urn, query):
|
||||
return self._download_json(
|
||||
'https://api.ardaudiothek.de/graphql', urn,
|
||||
data=json.dumps({
|
||||
'query': query,
|
||||
'variables': {'id': urn},
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['data']
|
||||
|
||||
|
||||
class ARDAudiothekIE(ARDAudiothekBaseIE):
|
||||
_VALID_URL = r'https:?//(?:www\.)?ardaudiothek\.de/episode/(?P<id>urn:ard:(?:episode|section|extra):[a-f0-9]{16})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardaudiothek.de/episode/urn:ard:episode:eabead1add170e93/',
|
||||
'info_dict': {
|
||||
'id': 'urn:ard:episode:eabead1add170e93',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20240717',
|
||||
'duration': 3339,
|
||||
'title': 'CAIMAN CLUB (S04E04): Cash Out',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:ed64411a07a4b405',
|
||||
'description': 'md5:0e5d127a3832ae59e8bab40a91a5dadc',
|
||||
'display_id': 'urn:ard:episode:eabead1add170e93',
|
||||
'timestamp': 1721181641,
|
||||
'series': '1LIVE Caiman Club',
|
||||
'channel': 'WDR',
|
||||
'episode': 'Episode 4',
|
||||
'episode_number': 4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardaudiothek.de/episode/urn:ard:section:855c7a53dac72e0a/',
|
||||
'info_dict': {
|
||||
'id': 'urn:ard:section:855c7a53dac72e0a',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241231',
|
||||
'duration': 3304,
|
||||
'title': 'Illegaler DDR-Detektiv: Doberschütz und die letzte Staatsjagd (1/2) - Wendezeit',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:b9b4f1e8b93da4dd',
|
||||
'description': 'md5:3552d571e1959754cff66c1da6c0fdae',
|
||||
'display_id': 'urn:ard:section:855c7a53dac72e0a',
|
||||
'timestamp': 1735629900,
|
||||
'series': 'Auf der Spur – Die ARD Ermittlerkrimis',
|
||||
'channel': 'ARD',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardaudiothek.de/episode/urn:ard:extra:d2fe7303d2dcbf5d/',
|
||||
'info_dict': {
|
||||
'id': 'urn:ard:extra:d2fe7303d2dcbf5d',
|
||||
'ext': 'mp3',
|
||||
'title': 'Trailer: Fanta Vier Forever, Baby!?!',
|
||||
'description': 'md5:b64a586f2e976b8bb5ea0a79dbd8751c',
|
||||
'channel': 'SWR',
|
||||
'duration': 62,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:48d3c255969be803',
|
||||
'series': 'Fanta Vier Forever, Baby!?!',
|
||||
'timestamp': 1732108217,
|
||||
'upload_date': '20241120',
|
||||
},
|
||||
}]
|
||||
|
||||
_QUERY_ITEM = '''\
|
||||
query($id: ID!) {
|
||||
item(id: $id) {
|
||||
audioList {
|
||||
href
|
||||
distributionType
|
||||
audioBitrate
|
||||
audioCodec
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
url1X1
|
||||
}
|
||||
programSet {
|
||||
publicationService {
|
||||
organizationName
|
||||
}
|
||||
}
|
||||
description
|
||||
title
|
||||
duration
|
||||
startDate
|
||||
episodeNumber
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
urn = self._match_id(url)
|
||||
item = self._graphql_query(urn, self._QUERY_ITEM)['item']
|
||||
return {
|
||||
'id': urn,
|
||||
**traverse_obj(item, {
|
||||
'formats': ('audioList', lambda _, v: url_or_none(v['href']), {
|
||||
'url': 'href',
|
||||
'format_id': ('distributionType', {str}),
|
||||
'abr': ('audioBitrate', {int_or_none}),
|
||||
'acodec': ('audioCodec', {str}),
|
||||
'vcodec': {value('none')},
|
||||
}),
|
||||
'channel': ('programSet', 'publicationService', 'organizationName', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'series': ('show', 'title', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'thumbnail': ('image', 'url1X1', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('startDate', {parse_iso8601}),
|
||||
'title': ('title', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class ARDAudiothekPlaylistIE(ARDAudiothekBaseIE):
|
||||
_VALID_URL = r'https:?//(?:www\.)?ardaudiothek\.de/sendung/(?P<playlist>[\w-]+)/(?P<id>urn:ard:show:[a-f0-9]{16})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardaudiothek.de/sendung/mia-insomnia/urn:ard:show:c405aa26d9a4060a/',
|
||||
'info_dict': {
|
||||
'display_id': 'mia-insomnia',
|
||||
'title': 'Mia Insomnia',
|
||||
'id': 'urn:ard:show:c405aa26d9a4060a',
|
||||
'description': 'md5:d9ceb7a6b4d26a4db3316573bb564292',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
'url': 'https://www.ardaudiothek.de/sendung/100-berlin/urn:ard:show:4d248e0806ce37bc/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_QUERY_PLAYLIST = '''
|
||||
query($id: ID!) {
|
||||
show(id: $id) {
|
||||
title
|
||||
description
|
||||
items(filter: { isPublished: { equalTo: true } }) {
|
||||
nodes {
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
urn, playlist = self._match_valid_url(url).group('id', 'playlist')
|
||||
playlist_info = self._graphql_query(urn, self._QUERY_PLAYLIST)['show']
|
||||
entries = []
|
||||
for url in traverse_obj(playlist_info, ('items', 'nodes', ..., 'url', {url_or_none})):
|
||||
entries.append(self.url_result(url, ie=ARDAudiothekIE))
|
||||
return self.playlist_result(entries, urn, display_id=playlist, **traverse_obj(playlist_info, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}))
|
||||
|
||||
74
yt_dlp/extractor/bitmovin.py
Normal file
74
yt_dlp/extractor/bitmovin.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitmovinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streams\.bitmovin\.com/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//streams\.bitmovin\.com/(?P<id>\w+)[^"\']+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/embed',
|
||||
'info_dict': {
|
||||
'id': 'cqkl1t5giv3lrce7pjbg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developing Osteopathic Residents as Faculty',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/share',
|
||||
'info_dict': {
|
||||
'id': 'cgl9rh94uvs51rqc8jhg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny (Streams Docs)',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# bitmovin-stream web component
|
||||
'url': 'https://www.institutionalinvestor.com/article/2bsw1in1l9k68mp9kritc/video-war-stories-over-board-games/best-case-i-get-fired-war-stories',
|
||||
'info_dict': {
|
||||
'id': 'cuiumeil6g115lc4li3g',
|
||||
'ext': 'mp4',
|
||||
'title': '[media] War Stories over Board Games: “Best Case: I Get Fired” ',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cuiumeil6g115lc4li3g/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'https://www.clearblueionizer.com/en/pool-ionizers/mineral-pool-vs-saltwater-pool/',
|
||||
'info_dict': {
|
||||
'id': 'cvpvfsm1pf7itg7cfvtg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pool Ionizer vs. Salt Chlorinator',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cvpvfsm1pf7itg7cfvtg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for stream_id in re.findall(r'<bitmovin-stream\b[^>]*\bstream-id=["\'](?P<id>\w+)', webpage):
|
||||
yield f'https://streams.bitmovin.com/{stream_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
f'https://streams.bitmovin.com/{video_id}/config', video_id)['sources']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
player_config['hls'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(player_config, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('poster', {str}),
|
||||
}),
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
|
||||
class BunnyCdnIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:iframe\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
|
||||
_VALID_URL = r'https?://(?:(?:iframe|player)\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL}[^\'"]*)[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://iframe.mediadelivery.net/embed/113933/e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||
@@ -39,7 +39,7 @@ class BunnyCdnIE(InfoExtractor):
|
||||
'timestamp': 1691145748,
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/32e34c4b-0d72-437c-9abb-05e67657da34/thumbnail_9172dc16\.jpg',
|
||||
'duration': 106.0,
|
||||
'description': 'md5:981a3e899a5c78352b21ed8b2f1efd81',
|
||||
'description': 'md5:11452bcb31f379ee3eaf1234d3264e44',
|
||||
'upload_date': '20230804',
|
||||
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
||||
},
|
||||
@@ -58,6 +58,23 @@ class BunnyCdnIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Requires any Referer
|
||||
'url': 'https://iframe.mediadelivery.net/embed/289162/6372f5a3-68df-4ef7-a115-e1110186c477',
|
||||
'info_dict': {
|
||||
'id': '6372f5a3-68df-4ef7-a115-e1110186c477',
|
||||
'ext': 'mp4',
|
||||
'title': '12-Creating Small Asset Blockouts -Timelapse.mp4',
|
||||
'description': '',
|
||||
'duration': 263.0,
|
||||
'timestamp': 1724485440,
|
||||
'upload_date': '20240824',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/6372f5a3-68df-4ef7-a115-e1110186c477/thumbnail\.jpg',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://player.mediadelivery.net/embed/519128/875880a9-bcc2-4038-9e05-e5024bba9b70',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# Stream requires Referer
|
||||
@@ -100,7 +117,7 @@ def _real_extract(self, url):
|
||||
video_id, library_id = self._match_valid_url(url).group('id', 'library_id')
|
||||
webpage = self._download_webpage(
|
||||
f'https://iframe.mediadelivery.net/embed/{library_id}/{video_id}', video_id,
|
||||
headers=traverse_obj(smuggled_data, {'Referer': 'Referer'}),
|
||||
headers={'Referer': smuggled_data.get('Referer') or 'https://iframe.mediadelivery.net/'},
|
||||
query=traverse_obj(parse_qs(url), {'token': 'token', 'expires': 'expires'}))
|
||||
|
||||
if html_title := self._html_extract_title(webpage, default=None) == '403':
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DigitekaIE(InfoExtractor):
|
||||
@@ -25,74 +26,56 @@ class DigitekaIE(InfoExtractor):
|
||||
)/(?P<id>[\d+a-z]+)'''
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
|
||||
_TESTS = [{
|
||||
# news
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/3x5x55k',
|
||||
'info_dict': {
|
||||
'id': 's8uk0r',
|
||||
'id': '3x5x55k',
|
||||
'ext': 'mp4',
|
||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||
'title': 'Il est passionné de DS',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 74,
|
||||
'upload_date': '20150317',
|
||||
'timestamp': 1426604939,
|
||||
'uploader_id': '3fszv',
|
||||
'duration': 89,
|
||||
'upload_date': '20251012',
|
||||
'timestamp': 1760285363,
|
||||
'uploader_id': '3pz33',
|
||||
},
|
||||
}, {
|
||||
# music
|
||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||
'info_dict': {
|
||||
'id': 'xvpfp8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Two - C\'est La Vie (clip)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 233,
|
||||
'upload_date': '20150224',
|
||||
'timestamp': 1424760500,
|
||||
'uploader_id': '3rfzk',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
||||
'only_matching': True,
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
_IFRAME_MD_ID = '01836272' # One static ID working for Ultimedia iframes
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('embed_type') or mobj.group('site_type')
|
||||
if video_type == 'music':
|
||||
video_type = 'musique'
|
||||
video_id = self._match_id(url)
|
||||
|
||||
deliver_info = self._download_json(
|
||||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||
video_id)
|
||||
|
||||
yt_id = deliver_info.get('yt_id')
|
||||
if yt_id:
|
||||
return self.url_result(yt_id, 'Youtube')
|
||||
|
||||
jwconf = deliver_info['jwconf']
|
||||
video_info = self._download_json(
|
||||
f'https://www.ultimedia.com/player/getConf/{self._IFRAME_MD_ID}/1/{video_id}', video_id,
|
||||
note='Downloading player configuration')['video']
|
||||
|
||||
formats = []
|
||||
for source in jwconf['playlist'][0]['sources']:
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
})
|
||||
subtitles = {}
|
||||
|
||||
title = deliver_info['title']
|
||||
thumbnail = jwconf.get('image')
|
||||
duration = int_or_none(deliver_info.get('duration'))
|
||||
timestamp = int_or_none(deliver_info.get('release_time'))
|
||||
uploader_id = deliver_info.get('owner_id')
|
||||
if hls_url := traverse_obj(video_info, ('media_sources', 'hls', 'hls_auto', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
for format_id, mp4_url in traverse_obj(video_info, ('media_sources', 'mp4', {dict.items}, ...)):
|
||||
if not mp4_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id.partition('_')[2]),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('creationDate', {int_or_none}),
|
||||
'uploader_id': ('ownerId', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DPlayBaseIE(InfoExtractor):
|
||||
@@ -1053,7 +1054,7 @@ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de)/(?:programme|show|sendungen)/(?P<programme>[^/?#]+)/(?:video/)?(?P<alternate_id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
|
||||
@@ -1074,6 +1075,7 @@ class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
'creators': ['DMAX'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
|
||||
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
|
||||
'categories': ['Gold', 'Schatzsucher'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -1100,20 +1102,96 @@ class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dmax.de/sendungen/feuerwache-3-alarm-in-muenchen/24-stunden-auf-der-feuerwache-3',
|
||||
'info_dict': {
|
||||
'id': '8873549',
|
||||
'ext': 'mp4',
|
||||
'title': '24 Stunden auf der Feuerwache 3',
|
||||
'description': 'md5:f3084ef6170bfb79f9a6e0c030e09330',
|
||||
'display_id': 'feuerwache-3-alarm-in-muenchen/24-stunden-auf-der-feuerwache-3',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Feuerwache 3 - Alarm in München',
|
||||
'duration': 2632.0,
|
||||
'upload_date': '20251016',
|
||||
'timestamp': 1760645100,
|
||||
'creators': ['DMAX'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2025/10/14/0bdee68c-a8d8-33d9-9204-16eb61108552.jpeg',
|
||||
'tags': [],
|
||||
'categories': ['DMAX Originals', 'Jobs', 'Blaulicht'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/ghost-adventures/der-poltergeist-im-kostumladen',
|
||||
'info_dict': {
|
||||
'id': '4550602',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Poltergeist im Kostümladen',
|
||||
'description': 'md5:20b52b9736a0a3a7873d19a238fad7fc',
|
||||
'display_id': 'ghost-adventures/der-poltergeist-im-kostumladen',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 25',
|
||||
'season_number': 25,
|
||||
'series': 'Ghost Adventures',
|
||||
'duration': 2493.0,
|
||||
'upload_date': '20241223',
|
||||
'timestamp': 1734948900,
|
||||
'creators': ['TLC'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/04/05/59941d26-a81b-365f-829f-69d8cd81fd0f.jpeg',
|
||||
'tags': [],
|
||||
'categories': ['Paranormal', 'Gruselig!'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/evil-gesichter-des-boesen/das-geheimnis-meines-bruders',
|
||||
'info_dict': {
|
||||
'id': '7792288',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Geheimnis meines Bruders',
|
||||
'description': 'md5:3167550bb582eb9c92875c86a0a20882',
|
||||
'display_id': 'evil-gesichter-des-boesen/das-geheimnis-meines-bruders',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Evil - Gesichter des Bösen',
|
||||
'duration': 2626.0,
|
||||
'upload_date': '20240926',
|
||||
'timestamp': 1727388000,
|
||||
'creators': ['TLC'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/11/29/e9f3e3ae-74ec-3631-81b7-fc7bbe844741.jpeg',
|
||||
'tags': 'count:13',
|
||||
'categories': ['True Crime', 'Mord'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, programme, alternate_id = self._match_valid_url(url).groups()
|
||||
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||
return self._get_disco_api_info(
|
||||
url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
|
||||
display_id = f'{programme}/{alternate_id}'
|
||||
meta = self._download_json(
|
||||
f'https://de-api.loma-cms.com/feloma/videos/{alternate_id}/',
|
||||
display_id, query={
|
||||
'environment': domain.split('.')[0],
|
||||
'v': '2',
|
||||
'filter[show.slug]': programme,
|
||||
}, fatal=False)
|
||||
video_id = traverse_obj(meta, ('uid', {str}, {lambda s: s[-7:]})) or display_id
|
||||
|
||||
disco_api_info = self._get_disco_api_info(
|
||||
url, video_id, 'eu1-prod.disco-api.com', domain.replace('.', ''), 'DE')
|
||||
disco_api_info['display_id'] = display_id
|
||||
disco_api_info['categories'] = traverse_obj(meta, (
|
||||
'taxonomies', lambda _, v: v['category'] == 'genre', 'title', {str.strip}, filter, all, filter))
|
||||
|
||||
return disco_api_info
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -205,6 +206,9 @@ def _real_extract(self, url):
|
||||
'client_app': 'browser_hls',
|
||||
'ipv6': '',
|
||||
}), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
# A non-zero 'status' indicates the stream is not live, so check truthiness
|
||||
if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server:
|
||||
raise UserNotLive(video_id=video_id)
|
||||
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
|
||||
|
||||
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
@@ -129,3 +129,36 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(
|
||||
self._entries(items), display_id, self._og_search_title(webpage, default=None),
|
||||
thumbnail=self._og_search_thumbnail(webpage, default=None))
|
||||
|
||||
|
||||
class FirstTVLiveIE(InfoExtractor):
|
||||
IE_NAME = '1tv:live'
|
||||
IE_DESC = 'Первый канал (прямой эфир)'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.1tv.ru/live',
|
||||
'info_dict': {
|
||||
'id': 'live',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ПЕРВЫЙ КАНАЛ ПРЯМОЙ ЭФИР СМОТРЕТЬ ОНЛАЙН \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'livestream'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = 'live'
|
||||
webpage = self._download_webpage(url, display_id, fatal=False)
|
||||
|
||||
streams_list = self._download_json('https://stream.1tv.ru/api/playlist/1tvch-v1_as_array.json', display_id)
|
||||
mpd_url = traverse_obj(streams_list, ('mpd', ..., {url_or_none}, any, {require('mpd url')}))
|
||||
# FFmpeg needs to be passed -re to not seek past live window. This is handled by core
|
||||
formats, _ = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, mpd_id='dash')
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -6,15 +6,15 @@
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class FloatplaneBaseIE(InfoExtractor):
|
||||
@@ -50,37 +50,31 @@ def _real_extract(self, url):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
stream = self._download_json(
|
||||
f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid'],
|
||||
}, note=f'Downloading {media_typ} stream data',
|
||||
f'{self._BASE_URL}/api/v3/delivery/info', media_id,
|
||||
query={'scenario': 'onDemand', 'entityId': media_id},
|
||||
note=f'Downloading {media_typ} stream data',
|
||||
impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
metadata = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id,
|
||||
f'Downloading {media_typ} metadata', query={'id': media_id},
|
||||
fatal=False, impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
cdn_base_url = traverse_obj(stream, (
|
||||
'groups', 0, 'origins', ..., 'url', {url_or_none}, any, {require('cdn base url')}))
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
format_id = traverse_obj(quality, ('name', {str}))
|
||||
for variant in traverse_obj(stream, ('groups', 0, 'variants', lambda _, v: v['url'])):
|
||||
format_url = urljoin(cdn_base_url, variant['url'])
|
||||
format_id = traverse_obj(variant, ('name', {str}))
|
||||
hls_aes = {}
|
||||
m3u8_data = None
|
||||
|
||||
# If we need impersonation for the API, then we need it for HLS keys too: extract in advance
|
||||
if self._IMPERSONATE_TARGET is not None:
|
||||
m3u8_data = self._download_webpage(
|
||||
url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
format_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '),
|
||||
errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' '))
|
||||
if not m3u8_data:
|
||||
@@ -98,18 +92,34 @@ def format_path(params):
|
||||
hls_aes['key'] = urlh.read().hex()
|
||||
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
**traverse_obj(variant, {
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
'width': ('meta', 'video', 'width', {int_or_none}),
|
||||
'height': ('meta', 'video', 'height', {int_or_none}),
|
||||
'vcodec': ('meta', 'video', 'codec', {str}),
|
||||
'acodec': ('meta', 'audio', 'codec', {str}),
|
||||
'vbr': ('meta', 'video', 'bitrate', 'average', {int_or_none(scale=1000)}),
|
||||
'abr': ('meta', 'audio', 'bitrate', 'average', {int_or_none(scale=1000)}),
|
||||
'audio_channels': ('meta', 'audio', 'channelCount', {int_or_none}),
|
||||
'fps': ('meta', 'video', 'fps', {float_or_none}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'hls_media_playlist_data': m3u8_data,
|
||||
'hls_aes': hls_aes or None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for sub_data in traverse_obj(metadata, ('textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
sub_lang = sub_data.get('language') or 'en'
|
||||
sub_entry = {'url': sub_data['src']}
|
||||
if sub_data.get('generated'):
|
||||
automatic_captions.setdefault(sub_lang, []).append(sub_entry)
|
||||
else:
|
||||
subtitles.setdefault(sub_lang, []).append(sub_entry)
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
@@ -119,6 +129,8 @@ def format_path(params):
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
|
||||
164
yt_dlp/extractor/frontro.py
Normal file
164
yt_dlp/extractor/frontro.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FrontoBaseIE(InfoExtractor):
|
||||
def _get_auth_headers(self, url):
|
||||
return traverse_obj(self._get_cookies(url), {
|
||||
'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
|
||||
})
|
||||
|
||||
|
||||
class FrontroVideoBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', video_id, data=json.dumps({
|
||||
'operationName': 'Video',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
|
||||
'query': '''query Video($channelID: ID!, $videoID: ID!) {
|
||||
video(ChannelID: $channelID, VideoID: $videoID) {
|
||||
... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['video']
|
||||
if not traverse_obj(metadata, 'hasAccess'):
|
||||
self.raise_login_required()
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class FrontroGroupBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
_VIDEO_EXTRACTOR = None
|
||||
_VIDEO_URL_TMPL = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
|
||||
data=json.dumps({
|
||||
'operationName': 'PaginatedStaticPageContainer',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
|
||||
'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
|
||||
pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
|
||||
... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
|
||||
id contentItem { ... on ItemVideo { videoItem: item {
|
||||
id
|
||||
}}}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['pageContainer']
|
||||
|
||||
entries = []
|
||||
for video_id in traverse_obj(metadata, (
|
||||
'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
|
||||
):
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TheChosenIE(FrontroVideoBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4212,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||
'timestamp': 1698954546,
|
||||
'upload_date': '20231102',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 5092,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||
'timestamp': 1715019474,
|
||||
'upload_date': '20240506',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
@@ -1,21 +1,20 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
filter_dict,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
parse_qs,
|
||||
try_get,
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class GoogleDriveIE(InfoExtractor):
|
||||
@@ -38,8 +37,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 45,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'duration': 45.069,
|
||||
'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
|
||||
},
|
||||
}, {
|
||||
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
|
||||
@@ -49,8 +48,29 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'ext': 'mp3',
|
||||
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
|
||||
'duration': 184,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'duration': 184.68,
|
||||
},
|
||||
}, {
|
||||
# Has subtitle track
|
||||
'url': 'https://drive.google.com/file/d/1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE/view',
|
||||
'md5': '05488c528da6ef737ec8c962bfa9724e',
|
||||
'info_dict': {
|
||||
'id': '1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE',
|
||||
'ext': 'mp4',
|
||||
'title': 'test.mp4',
|
||||
'duration': 9.999,
|
||||
'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
|
||||
},
|
||||
}, {
|
||||
# Has subtitle track with kind 'asr'
|
||||
'url': 'https://drive.google.com/file/d/1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev/view',
|
||||
'md5': 'ccae12d07f18b5988900b2c8b92801fc',
|
||||
'info_dict': {
|
||||
'id': '1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev',
|
||||
'ext': 'mp4',
|
||||
'title': 'LEE NA GYUNG-3410-VOICE_MESSAGE.mp4',
|
||||
'duration': 8.766,
|
||||
'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
|
||||
},
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
@@ -71,17 +91,6 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
|
||||
'50': 'm4a',
|
||||
}
|
||||
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||
_CAPTIONS_ENTRY_TAG = {
|
||||
'subtitles': 'track',
|
||||
'automatic_captions': 'target',
|
||||
}
|
||||
_caption_formats_ext = []
|
||||
_captions_xml = None
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
@@ -91,129 +100,73 @@ def _extract_embed_urls(cls, url, webpage):
|
||||
if mobj:
|
||||
yield 'https://drive.google.com/file/d/{}'.format(mobj.group('id'))
|
||||
|
||||
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
|
||||
if self._captions_xml:
|
||||
return
|
||||
self._captions_xml = self._download_xml(
|
||||
self._BASE_URL_CAPTIONS, video_id, query={
|
||||
'id': video_id,
|
||||
'vid': subtitles_id,
|
||||
'hl': hl,
|
||||
@staticmethod
|
||||
def _construct_subtitle_url(base_url, video_id, language, fmt, kind):
|
||||
return update_url_query(
|
||||
base_url, filter_dict({
|
||||
'hl': 'en-US',
|
||||
'v': video_id,
|
||||
'type': 'track',
|
||||
'lang': language,
|
||||
'fmt': fmt,
|
||||
'kind': kind,
|
||||
}))
|
||||
|
||||
def _get_subtitles(self, video_id, video_info):
|
||||
subtitles = {}
|
||||
timed_text_base_url = traverse_obj(video_info, ('timedTextDetails', 'timedTextBaseUrl', {url_or_none}))
|
||||
if not timed_text_base_url:
|
||||
return subtitles
|
||||
subtitle_data = self._download_xml(
|
||||
timed_text_base_url, video_id, 'Downloading subtitles XML', fatal=False, query={
|
||||
'hl': 'en-US',
|
||||
'type': 'list',
|
||||
'tlangs': '1',
|
||||
'fmts': '1',
|
||||
'vssids': '1',
|
||||
}, note='Downloading subtitles XML',
|
||||
errnote='Unable to download subtitles XML', fatal=False)
|
||||
if self._captions_xml:
|
||||
for f in self._captions_xml.findall('format'):
|
||||
if f.attrib.get('fmt_code') and not f.attrib.get('default'):
|
||||
self._caption_formats_ext.append(f.attrib['fmt_code'])
|
||||
|
||||
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
|
||||
origin_lang_code=None, origin_lang_name=None):
|
||||
if not subtitles_id or not caption_type:
|
||||
return
|
||||
captions = {}
|
||||
for caption_entry in self._captions_xml.findall(
|
||||
self._CAPTIONS_ENTRY_TAG[caption_type]):
|
||||
caption_lang_code = caption_entry.attrib.get('lang_code')
|
||||
caption_name = caption_entry.attrib.get('name') or origin_lang_name
|
||||
if not caption_lang_code or not caption_name:
|
||||
self.report_warning(f'Missing necessary caption metadata. '
|
||||
f'Need lang_code and name attributes. '
|
||||
f'Found: {caption_entry.attrib}')
|
||||
continue
|
||||
caption_format_data = []
|
||||
for caption_format in self._caption_formats_ext:
|
||||
query = {
|
||||
'vid': subtitles_id,
|
||||
'v': video_id,
|
||||
'fmt': caption_format,
|
||||
'lang': (caption_lang_code if origin_lang_code is None
|
||||
else origin_lang_code),
|
||||
'type': 'track',
|
||||
'name': caption_name,
|
||||
'kind': '',
|
||||
}
|
||||
if origin_lang_code is not None:
|
||||
query.update({'tlang': caption_lang_code})
|
||||
caption_format_data.append({
|
||||
'url': update_url_query(self._BASE_URL_CAPTIONS, query),
|
||||
'ext': caption_format,
|
||||
})
|
||||
captions[caption_lang_code] = caption_format_data
|
||||
return captions
|
||||
|
||||
def _get_subtitles(self, video_id, subtitles_id, hl):
|
||||
if not subtitles_id or not hl:
|
||||
return
|
||||
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||
if not self._captions_xml:
|
||||
return
|
||||
return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
|
||||
|
||||
def _get_automatic_captions(self, video_id, subtitles_id, hl):
|
||||
if not subtitles_id or not hl:
|
||||
return
|
||||
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||
if not self._captions_xml:
|
||||
return
|
||||
track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
|
||||
if track is None:
|
||||
return
|
||||
origin_lang_code = track.attrib.get('lang_code')
|
||||
origin_lang_name = track.attrib.get('name')
|
||||
if not origin_lang_code or not origin_lang_name:
|
||||
return
|
||||
return self._get_captions_by_type(
|
||||
video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
|
||||
'tlangs': 1,
|
||||
'v': video_id,
|
||||
'vssids': 1,
|
||||
})
|
||||
subtitle_formats = traverse_obj(subtitle_data, (lambda _, v: v.tag == 'format', {lambda x: x.get('fmt_code')}, {str}))
|
||||
for track in traverse_obj(subtitle_data, (lambda _, v: v.tag == 'track' and v.get('lang_code'))):
|
||||
language = track.get('lang_code')
|
||||
subtitles.setdefault(language, []).extend([{
|
||||
'url': self._construct_subtitle_url(
|
||||
timed_text_base_url, video_id, language, sub_fmt, track.get('kind')),
|
||||
'name': track.get('lang_original'),
|
||||
'ext': sub_fmt,
|
||||
} for sub_fmt in subtitle_formats])
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = urllib.parse.parse_qs(self._download_webpage(
|
||||
'https://drive.google.com/get_video_info',
|
||||
video_id, 'Downloading video webpage', query={'docid': video_id}))
|
||||
|
||||
def get_value(key):
|
||||
return try_get(video_info, lambda x: x[key][0])
|
||||
|
||||
reason = get_value('reason')
|
||||
title = get_value('title')
|
||||
video_info = self._download_json(
|
||||
f'https://content-workspacevideo-pa.googleapis.com/v1/drive/media/{video_id}/playback',
|
||||
video_id, 'Downloading video webpage', query={'key': 'AIzaSyDVQw45DwoYh632gvsP5vPDqEKvb-Ywnb8'},
|
||||
headers={'Referer': 'https://drive.google.com/'})
|
||||
|
||||
formats = []
|
||||
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
for fmt in traverse_obj(video_info, (
|
||||
'mediaStreamingData', 'formatStreamingData', ('adaptiveTranscodes', 'progressiveTranscodes'),
|
||||
lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
**traverse_obj(fmt, {
|
||||
'url': 'url',
|
||||
'format_id': ('itag', {int}, {str_or_none}),
|
||||
}),
|
||||
**traverse_obj(fmt, ('transcodeMetadata', {
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'fps': ('videoFps', {int_or_none}),
|
||||
'filesize': ('contentLength', {int_or_none}),
|
||||
'vcodec': ((('videoCodecString', {str}), {value('none')}), any),
|
||||
'acodec': ((('audioCodecString', {str}), {value('none')}), any),
|
||||
})),
|
||||
'downloader_options': {
|
||||
'http_chunk_size': 10 << 20,
|
||||
},
|
||||
})
|
||||
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
ext = self._FORMATS_EXT.get(format_id)
|
||||
if not ext:
|
||||
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': ext,
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[1],
|
||||
})
|
||||
formats.append(f)
|
||||
title = traverse_obj(video_info, ('mediaMetadata', 'title', {str}))
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.usercontent.google.com/download', {
|
||||
@@ -264,30 +217,20 @@ def add_source_format(urlh):
|
||||
or get_element_by_class('uc-error-caption', confirmation_webpage)
|
||||
or 'unable to extract confirmation code')
|
||||
|
||||
if not formats and reason:
|
||||
if title:
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
else:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
hl = get_value('hl')
|
||||
subtitles_id = None
|
||||
ttsurl = get_value('ttsurl')
|
||||
if ttsurl:
|
||||
# the subtitles ID is the vid param of the ttsurl query
|
||||
subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
|
||||
|
||||
self.cookiejar.clear(domain='.google.com', path='/', name='NID')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||
'duration': int_or_none(get_value('length_seconds')),
|
||||
**traverse_obj(video_info, {
|
||||
'duration': ('mediaMetadata', 'duration', {parse_duration}),
|
||||
'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['url']), {
|
||||
'url': 'url',
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||
'automatic_captions': self.extract_automatic_captions(
|
||||
video_id, subtitles_id, hl),
|
||||
'subtitles': self.extract_subtitles(video_id, video_info),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -13,12 +13,14 @@
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
|
||||
IE_NAME = 'play.tv'
|
||||
IE_DESC = 'PLAY (formerly goplay.be)'
|
||||
_VALID_URL = r'https?://(www\.)?play\.tv/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
|
||||
|
||||
_NETRC_MACHINE = 'goplay'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
|
||||
'url': 'https://www.play.tv/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||
'ext': 'mp4',
|
||||
@@ -33,7 +35,7 @@ class GoPlayIE(InfoExtractor):
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/1917',
|
||||
'url': 'https://www.play.tv/video/1917',
|
||||
'info_dict': {
|
||||
'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||
'ext': 'mp4',
|
||||
@@ -43,7 +45,7 @@ class GoPlayIE(InfoExtractor):
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'url': 'https://www.play.tv/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||
'ext': 'mp4',
|
||||
@@ -101,7 +103,7 @@ def _real_extract(self, url):
|
||||
break
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
f'https://api.play.tv/web/v1/videos/long-form/{video_id}',
|
||||
video_id, headers={
|
||||
'Authorization': f'Bearer {self._id_token}',
|
||||
**self.geo_verification_headers(),
|
||||
|
||||
@@ -98,7 +98,7 @@ def _real_extract(self, url):
|
||||
|
||||
formats = []
|
||||
for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
|
||||
stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
|
||||
stream_url = re.sub(r'/playlist_pd\d+\.m3u8', '/playlist.m3u8', stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
|
||||
|
||||
metadata = self._download_json(
|
||||
|
||||
@@ -17,57 +17,60 @@ class KikaIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
# Video without season/episode info
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
|
||||
'md5': '4a9f6e0f9c6bfcc82394c294f186d6db',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'id': 'logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
'title': 'logo! vom Dienstag, 28. Oktober 2025',
|
||||
'description': 'md5:4d28b92cef423bec99740ffaa3e7ec04',
|
||||
'duration': 651,
|
||||
'timestamp': 1761678000,
|
||||
'upload_date': '20251028',
|
||||
'modified_timestamp': 1761682624,
|
||||
'modified_date': '20251028',
|
||||
},
|
||||
}, {
|
||||
# Video with season/episode info
|
||||
# Also: Video with subtitles
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'md5': 'e58073070acb195906c55c4ad31dceb3',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'modified_date': '20240319',
|
||||
'subtitles': 'count:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
# Video without subtitles
|
||||
'url': 'https://www.kika.de/die-pfefferkoerner/videos/abgezogen-102',
|
||||
'md5': '62e97961ce5343c19f0f330a1b6dd736',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'id': 'abgezogen-102',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'title': '1. Abgezogen',
|
||||
'description': 'md5:42d87963364391f9f8eba8affcb30bd2',
|
||||
'duration': 1574,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1735382700,
|
||||
'upload_date': '20241228',
|
||||
'modified_timestamp': 1757344051,
|
||||
'modified_date': '20250908',
|
||||
'subtitles': 'count:0',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -78,16 +81,19 @@ def _real_extract(self, url):
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
# Subtitle API endpoints may be present in the JSON even if there are no subtitles.
|
||||
# They then return HTTP 200 with invalid data. So we must check explicitly.
|
||||
if doc.get('hasSubtitle'):
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -8,12 +8,10 @@
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
get_first,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
@@ -51,7 +49,7 @@ class LoomIE(InfoExtractor):
|
||||
}, {
|
||||
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, vtt sub and json subs
|
||||
'url': 'https://www.loom.com/share/9458bcbf79784162aa62ffb8dd66201b',
|
||||
'md5': '51737ec002969dd28344db4d60b9cbbb',
|
||||
'md5': '7b6bfdef8181c4ffc376e18919a4dcc2',
|
||||
'info_dict': {
|
||||
'id': '9458bcbf79784162aa62ffb8dd66201b',
|
||||
'ext': 'mp4',
|
||||
@@ -71,12 +69,13 @@ class LoomIE(InfoExtractor):
|
||||
'ext': 'webm',
|
||||
'title': 'OMFG clown',
|
||||
'description': 'md5:285c5ee9d62aa087b7e3271b08796815',
|
||||
'uploader': 'MrPumkin B',
|
||||
'uploader': 'Brailey Bragg',
|
||||
'upload_date': '20210924',
|
||||
'timestamp': 1632519618,
|
||||
'duration': 210,
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
'expected_warnings': ['Failed to parse JSON'], # transcoded-url no longer available
|
||||
}, {
|
||||
# password-protected
|
||||
'url': 'https://www.loom.com/share/50e26e8aeb7940189dff5630f95ce1f4',
|
||||
@@ -91,10 +90,11 @@ class LoomIE(InfoExtractor):
|
||||
'duration': 35,
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
'expected_warnings': ['Failed to parse JSON'], # transcoded-url no longer available
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'md5': 'f983a0f02f24331738b2f43aecb05256',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -119,11 +119,12 @@ class LoomIE(InfoExtractor):
|
||||
'duration': 247,
|
||||
'timestamp': 1676274030,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GRAPHQL_VARIABLES = {
|
||||
'GetVideoSource': {
|
||||
'acceptableMimes': ['DASH', 'M3U8', 'MP4'],
|
||||
'acceptableMimes': ['DASH', 'M3U8', 'MP4', 'WEBM'],
|
||||
},
|
||||
}
|
||||
_GRAPHQL_QUERIES = {
|
||||
@@ -192,6 +193,12 @@ class LoomIE(InfoExtractor):
|
||||
id
|
||||
nullableRawCdnUrl(acceptableMimes: $acceptableMimes, password: $password) {
|
||||
url
|
||||
credentials {
|
||||
Policy
|
||||
Signature
|
||||
KeyPairId
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
@@ -240,9 +247,9 @@ class LoomIE(InfoExtractor):
|
||||
}
|
||||
}\n'''),
|
||||
}
|
||||
_APOLLO_GRAPHQL_VERSION = '0a1856c'
|
||||
_APOLLO_GRAPHQL_VERSION = '45a5bd4'
|
||||
|
||||
def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||
def _call_graphql_api(self, operation_name, video_id, note=None, errnote=None, fatal=True):
|
||||
password = self.get_param('videopassword')
|
||||
return self._download_json(
|
||||
'https://www.loom.com/graphql', video_id, note or 'Downloading GraphQL JSON',
|
||||
@@ -252,7 +259,9 @@ def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||
'x-loom-request-source': f'loom_web_{self._APOLLO_GRAPHQL_VERSION}',
|
||||
'apollographql-client-name': 'web',
|
||||
'apollographql-client-version': self._APOLLO_GRAPHQL_VERSION,
|
||||
}, data=json.dumps([{
|
||||
'graphql-operation-name': operation_name,
|
||||
'Origin': 'https://www.loom.com',
|
||||
}, data=json.dumps({
|
||||
'operationName': operation_name,
|
||||
'variables': {
|
||||
'videoId': video_id,
|
||||
@@ -260,7 +269,7 @@ def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||
**self._GRAPHQL_VARIABLES.get(operation_name, {}),
|
||||
},
|
||||
'query': self._GRAPHQL_QUERIES[operation_name],
|
||||
} for operation_name in variadic(operations)], separators=(',', ':')).encode())
|
||||
}, separators=(',', ':')).encode(), fatal=fatal)
|
||||
|
||||
def _call_url_api(self, endpoint, video_id):
|
||||
response = self._download_json(
|
||||
@@ -275,7 +284,7 @@ def _call_url_api(self, endpoint, video_id):
|
||||
}, separators=(',', ':')).encode())
|
||||
return traverse_obj(response, ('url', {url_or_none}))
|
||||
|
||||
def _extract_formats(self, video_id, metadata, gql_data):
|
||||
def _extract_formats(self, video_id, metadata, video_data):
|
||||
formats = []
|
||||
video_properties = traverse_obj(metadata, ('video_properties', {
|
||||
'width': ('width', {int_or_none}),
|
||||
@@ -330,7 +339,7 @@ def get_formats(format_url, format_id, quality):
|
||||
transcoded_url = self._call_url_api('transcoded-url', video_id)
|
||||
formats.extend(get_formats(transcoded_url, 'transcoded', quality=-1)) # transcoded quality
|
||||
|
||||
cdn_url = get_first(gql_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||
cdn_url = traverse_obj(video_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||
# cdn_url is usually a dupe, but the raw-url/transcoded-url endpoints could return errors
|
||||
valid_urls = [update_url(url, query=None) for url in (raw_url, transcoded_url) if url]
|
||||
if cdn_url and update_url(cdn_url, query=None) not in valid_urls:
|
||||
@@ -338,10 +347,21 @@ def get_formats(format_url, format_id, quality):
|
||||
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
subs_data = self._call_graphql_api(
|
||||
'FetchVideoTranscript', video_id, 'Downloading GraphQL subtitles JSON', fatal=False)
|
||||
return filter_dict({
|
||||
'en': traverse_obj(subs_data, (
|
||||
'data', 'fetchVideoTranscript',
|
||||
('source_url', 'captions_source_url'), {
|
||||
'url': {url_or_none},
|
||||
})) or None,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = get_first(
|
||||
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON'),
|
||||
metadata = traverse_obj(
|
||||
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON', fatal=False),
|
||||
('data', 'getVideo', {dict})) or {}
|
||||
|
||||
if metadata.get('__typename') == 'VideoPasswordMissingOrIncorrect':
|
||||
@@ -350,22 +370,19 @@ def _real_extract(self, url):
|
||||
'This video is password-protected, use the --video-password option', expected=True)
|
||||
raise ExtractorError('Invalid video password', expected=True)
|
||||
|
||||
gql_data = self._call_graphql_api(['FetchChapters', 'FetchVideoTranscript', 'GetVideoSource'], video_id)
|
||||
video_data = self._call_graphql_api(
|
||||
'GetVideoSource', video_id, 'Downloading GraphQL video JSON')
|
||||
chapter_data = self._call_graphql_api(
|
||||
'FetchChapters', video_id, 'Downloading GraphQL chapters JSON', fatal=False)
|
||||
duration = traverse_obj(metadata, ('video_properties', 'duration', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'chapters': self._extract_chapters_from_description(
|
||||
get_first(gql_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||
'formats': self._extract_formats(video_id, metadata, gql_data),
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(gql_data, (
|
||||
..., 'data', 'fetchVideoTranscript',
|
||||
('source_url', 'captions_source_url'), {
|
||||
'url': {url_or_none},
|
||||
})) or None,
|
||||
}),
|
||||
traverse_obj(chapter_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||
'formats': self._extract_formats(video_id, metadata, video_data),
|
||||
'subtitles': self.extract_subtitles(video_id),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
@@ -376,6 +393,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class LoomFolderIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'loom:folder'
|
||||
_VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>[\da-f]{32})'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import re
|
||||
import functools
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -10,15 +12,64 @@
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
class MaveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://api.mave.digital/v1/website'
|
||||
_API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
|
||||
|
||||
def _load_channel_meta(self, channel_id, display_id):
|
||||
return traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/', display_id,
|
||||
note='Downloading channel metadata'), 'podcast')
|
||||
|
||||
def _load_episode_meta(self, channel_id, episode_code, display_id):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
|
||||
display_id, note='Downloading episode metadata')
|
||||
|
||||
def _create_entry(self, channel_id, channel_meta, episode_meta):
|
||||
episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
|
||||
return {
|
||||
'display_id': f'{channel_id}-{episode_code}',
|
||||
'extractor_key': MaveIE.ie_key(),
|
||||
'extractor': MaveIE.IE_NAME,
|
||||
'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode_meta, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(channel_meta, {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MaveIE(MaveBaseIE):
|
||||
IE_NAME = 'mave'
|
||||
_VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'display_id': 'ochenlichnoe-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
@@ -45,7 +96,7 @@ class MaveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'display_id': 'budem-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
@@ -68,40 +119,72 @@ class MaveIE(InfoExtractor):
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
channel_id, episode_code = self._match_valid_url(url).group(
|
||||
'channel_id', 'episode_code')
|
||||
display_id = f'{channel_id}-{episode_code}'
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, display_id)
|
||||
episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
|
||||
|
||||
return self._create_entry(channel_id, channel_meta, episode_meta)
|
||||
|
||||
|
||||
class MaveChannelIE(MaveBaseIE):
|
||||
IE_NAME = 'mave:channel'
|
||||
_VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://budem.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'budem',
|
||||
'title': 'Все там будем',
|
||||
'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://ochenlichnoe.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'ochenlichnoe',
|
||||
'title': 'Очень личное',
|
||||
'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://geekcity.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'geekcity',
|
||||
'title': 'Мужчины в трико',
|
||||
'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _entries(self, channel_id, channel_meta, page_num):
|
||||
page_data = self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
|
||||
'view': 'all',
|
||||
'page': page_num + 1,
|
||||
'sort': 'newest',
|
||||
'format': 'all',
|
||||
}, note=f'Downloading page {page_num + 1}')
|
||||
for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
|
||||
yield self._create_entry(channel_id, channel_meta, ep)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, channel_id)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
**traverse_obj(channel_meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
'description': ('description', {str}),
|
||||
}),
|
||||
'entries': InAdvancePagedList(
|
||||
functools.partial(self._entries, channel_id, channel_meta),
|
||||
math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
|
||||
}
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
@@ -30,25 +25,8 @@ class MedalTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/ugcp/content-thumbnail/.*\.jpg',
|
||||
'tags': ['headshot', 'valorant', '4k', 'clutch', 'mornu'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
@@ -57,12 +35,12 @@ class MedalTVIE(InfoExtractor):
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'description': '',
|
||||
'uploader': 'zahl',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.png',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
@@ -70,91 +48,77 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 9,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# API requires auth
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'md5': '6c6bb6569777fd8b4ef7b33c09de8dcf',
|
||||
'info_dict': {
|
||||
'id': '2WRj40tpY_EU9',
|
||||
'ext': 'mp4',
|
||||
'title': '1v5 clutch',
|
||||
'description': '',
|
||||
'uploader': 'adny',
|
||||
'uploader_id': '6256941',
|
||||
'uploader_url': 'https://medal.tv/users/6256941',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 25,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.jpg',
|
||||
'timestamp': 1612896680,
|
||||
'upload_date': '20210209',
|
||||
},
|
||||
'expected_warnings': ['Video formats are not available through API'],
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': round(aspect_ratio * height),
|
||||
'height': height,
|
||||
})
|
||||
content_data = self._download_json(
|
||||
f'https://medal.tv/api/content/{video_id}', video_id,
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
self.raise_no_formats(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'An unknown error occurred ({error}).',
|
||||
video_id=video_id)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
if m3u8_url := url_or_none(content_data.get('contentUrlHls')):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
if http_url := url_or_none(content_data.get('contentUrl')):
|
||||
formats.append({
|
||||
'url': http_url,
|
||||
'format_id': 'http-source',
|
||||
'ext': 'mp4',
|
||||
'quality': 1,
|
||||
})
|
||||
formats = [fmt for fmt in formats if 'video/privacy-protected-guest' not in fmt['url']]
|
||||
if not formats:
|
||||
# Fallback, does not require auth
|
||||
self.report_warning('Video formats are not available through API, falling back to social video URL')
|
||||
urlh = self._request_webpage(
|
||||
f'https://medal.tv/api/content/{video_id}/socialVideoUrl', video_id,
|
||||
note='Checking social video URL')
|
||||
formats.append({
|
||||
'url': urlh.url,
|
||||
'format_id': 'social-video',
|
||||
'ext': 'mp4',
|
||||
'quality': -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('contentTitle', {str}),
|
||||
'description': ('contentDescription', {str}),
|
||||
'timestamp': ('created', {int_or_none(scale=1000)}),
|
||||
'duration': ('videoLengthSeconds', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'uploader': ('poster', 'displayName', {str}),
|
||||
'uploader_id': ('poster', 'userId', {str}),
|
||||
'uploader_url': ('poster', 'userId', {str}, filter, {lambda x: x and f'https://medal.tv/users/{x}'}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
92
yt_dlp/extractor/mux.py
Normal file
92
yt_dlp/extractor/mux.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MuxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:stream\.new/v|player\.mux\.com)/(?P<id>[A-Za-z0-9-]+)'
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:stream\.new/v|player\.mux\.com)/(?P<id>[A-Za-z0-9-]+)[^"\']+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://stream.new/v/OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j/embed',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j',
|
||||
'title': 'OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player.mux.com/OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j',
|
||||
'title': 'OCtRWZiZqKvLbnZ32WSEYiGNvHdAmB01j',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# iframe embed
|
||||
'url': 'https://www.redbrickai.com/blog/2025-07-14-FAST-brush',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'cXhzAiW1AmsHY01eRbEYFcTEAn0102aGN8sbt8JprP6Dfw',
|
||||
'title': 'cXhzAiW1AmsHY01eRbEYFcTEAn0102aGN8sbt8JprP6Dfw',
|
||||
},
|
||||
}, {
|
||||
# mux-player embed
|
||||
'url': 'https://muxvideo.2coders.com/download/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'JBuasdg35Hw7tYmTe9k68QLPQKixL300YsWHDz5Flit8',
|
||||
'title': 'JBuasdg35Hw7tYmTe9k68QLPQKixL300YsWHDz5Flit8',
|
||||
},
|
||||
}, {
|
||||
# mux-player with title metadata
|
||||
'url': 'https://datastar-todomvc.cross.stream/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'KX01ZSZ8CXv5SVfVwMZKJTcuBcUQmo1ReS9U5JjoHm4k',
|
||||
'title': 'TodoMVC with Datastar Tutorial',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for mux_player in re.findall(r'<mux-(?:player|video)\b[^>]*\bplayback-id=[^>]+>', webpage):
|
||||
attrs = extract_attributes(mux_player)
|
||||
playback_id = attrs.get('playback-id')
|
||||
if not playback_id:
|
||||
continue
|
||||
token = attrs.get('playback-token') or traverse_obj(playback_id, ({parse_qs}, 'token', -1))
|
||||
playback_id = playback_id.partition('?')[0]
|
||||
|
||||
embed_url = update_url_query(
|
||||
f'https://player.mux.com/{playback_id}',
|
||||
filter_dict({'playback-token': token}))
|
||||
if title := attrs.get('metadata-video-title'):
|
||||
embed_url = smuggle_url(embed_url, {'title': title})
|
||||
yield embed_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
|
||||
token = traverse_obj(parse_qs(url), ('playback-token', -1))
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://stream.mux.com/{video_id}.m3u8', video_id, 'mp4',
|
||||
query=filter_dict({'token': token}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': smuggled_data.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
60
yt_dlp/extractor/nascar.py
Normal file
60
yt_dlp/extractor/nascar.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NascarClassicsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?classics\.nascar\.com/video/(?P<id>[\w~-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://classics.nascar.com/video/Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
|
||||
'md5': '81d712eccffa7169c328281b8cc28f77',
|
||||
'info_dict': {
|
||||
'id': 'Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cook Out 400 2023',
|
||||
'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2023_22_RICHMOND_THUMB_NCD.jpg',
|
||||
'timestamp': 1690732800,
|
||||
'upload_date': '20230730',
|
||||
'tags': ['2023', 'race #22', 'richmond', 'chris buescher', 'cup'],
|
||||
'chapters': 'count:18',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://classics.nascar.com/video/UASvPDOwEha~SIvJii7uAC~wszPshklHN',
|
||||
'md5': 'a5e8d6ec6005da3857d25ba2df5e7133',
|
||||
'info_dict': {
|
||||
'id': 'UASvPDOwEha~SIvJii7uAC~wszPshklHN',
|
||||
'ext': 'mp4',
|
||||
'title': 'I Love New York 355 at the Glen 2017',
|
||||
'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2017_22_WATKINSGLEN_THUMB_NCD.jpg',
|
||||
'timestamp': 1501995600,
|
||||
'upload_date': '20170806',
|
||||
'tags': ['watkins glen', 'race #22', '2017', 'martin truex jr.', 'cup'],
|
||||
'chapters': 'count:13',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
content_data = self._search_nextjs_data(
|
||||
webpage, video_id)['props']['pageProps']['contentData']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(content_data['input']['src'], video_id, 'mp4'),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('input', 'name', {str}),
|
||||
'description': ('input', 'description', {str}, filter),
|
||||
'thumbnail': ('input', 'thumbnail', {url_or_none}),
|
||||
'tags': ('input', 'settings', 'tags', ..., {str}),
|
||||
'timestamp': ('input', 'start_time', {parse_iso8601}),
|
||||
'chapters': ('overlay', 'data', 'timelines', 0, 'events', lambda _, v: float(v['timestamp']) is not None, {
|
||||
'start_time': ('timestamp', {float_or_none}),
|
||||
'title': ('name', {str}),
|
||||
}),
|
||||
}),
|
||||
}
|
||||
@@ -63,7 +63,7 @@ def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query):
|
||||
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
|
||||
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
|
||||
query['formats'] = 'm3u+none,mpeg4'
|
||||
m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
orig_m3u8_url = m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
|
||||
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
|
||||
query['formats'] = 'mpeg4'
|
||||
@@ -76,7 +76,17 @@ def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query):
|
||||
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
|
||||
return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
if not formats and m3u8_url != orig_m3u8_url:
|
||||
orig_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
orig_m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats = [f for f in orig_fmts if not f.get('has_drm')]
|
||||
if orig_fmts and not formats:
|
||||
self.report_drm(video_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
79
yt_dlp/extractor/netapp.py
Normal file
79
yt_dlp/extractor/netapp.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NetAppBaseIE(InfoExtractor):
|
||||
_BC_URL = 'https://players.brightcove.net/6255154784001/default_default/index.html?videoId={}'
|
||||
|
||||
@staticmethod
|
||||
def _parse_metadata(item):
|
||||
return traverse_obj(item, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
})
|
||||
|
||||
|
||||
class NetAppVideoIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/video-detail/(?P<id>[0-9a-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/video-detail/da25fc01-82ad-5284-95bc-26920200a222/seamless-storage-for-modern-kubernetes-deployments',
|
||||
'info_dict': {
|
||||
'id': '1843620950167202073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seamless storage for modern Kubernetes deployments',
|
||||
'description': 'md5:1ee39e315243fe71fb90af2796037248',
|
||||
'uploader_id': '6255154784001',
|
||||
'duration': 2159.41,
|
||||
'thumbnail': r're:https://house-fastly-signed-us-east-1-prod\.brightcovecdn\.com/image/.*\.jpg',
|
||||
'tags': 'count:15',
|
||||
'timestamp': 1758213949,
|
||||
'upload_date': '20250918',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://media.netapp.com/video-detail/45593e5d-cf1c-5996-978c-c9081906e69f/unleash-ai-innovation-with-your-data-with-the-netapp-platform',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/detail/{video_uuid}', video_uuid)
|
||||
|
||||
brightcove_video_id = traverse_obj(metadata, (
|
||||
'sections', lambda _, v: v['type'] == 'Player', 'video', {str}, any, {require('brightcove video id')}))
|
||||
|
||||
video_item = traverse_obj(metadata, ('sections', lambda _, v: v['type'] == 'VideoDetail', any))
|
||||
|
||||
return self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(video_item))
|
||||
|
||||
|
||||
class NetAppCollectionIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/collection/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/collection/9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
'info_dict': {
|
||||
'title': 'Featured sessions',
|
||||
'id': '9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _entries(self, metadata):
|
||||
for item in traverse_obj(metadata, ('items', lambda _, v: v['brightcoveVideoId'])):
|
||||
brightcove_video_id = item['brightcoveVideoId']
|
||||
yield self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(item))
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/collection/{collection_uuid}', collection_uuid)
|
||||
|
||||
return self.playlist_result(self._entries(metadata), collection_uuid, playlist_title=metadata.get('name'))
|
||||
@@ -23,96 +23,38 @@
|
||||
|
||||
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://api.nhkworld.jp/showsapi/v1/{lang}/{content_format}_{page_type}/{m_id}{extra_page}'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
|
||||
|
||||
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||
content_format = 'video' if is_video else 'audio'
|
||||
content_type = 'clips' if is_clip else 'episodes'
|
||||
if not is_episode:
|
||||
extra_page = f'/{content_format}_{content_type}'
|
||||
page_type = 'programs'
|
||||
else:
|
||||
extra_page = ''
|
||||
page_type = content_type
|
||||
|
||||
return self._download_json(
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if is_clip else 'esd',
|
||||
'episode' if is_episode else 'program',
|
||||
m_id, lang, '/all' if is_video else ''),
|
||||
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
|
||||
|
||||
def _get_api_info(self, refresh=True):
|
||||
if not refresh:
|
||||
return self.cache.load('nhk', 'api_info')
|
||||
|
||||
self.cache.store('nhk', 'api_info', {})
|
||||
movie_player_js = self._download_webpage(
|
||||
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
|
||||
note='Downloading stream API information')
|
||||
api_info = {
|
||||
'url': self._search_regex(
|
||||
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
|
||||
'token': self._search_regex(
|
||||
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
|
||||
}
|
||||
self.cache.store('nhk', 'api_info', api_info)
|
||||
return api_info
|
||||
|
||||
def _extract_stream_info(self, vod_id):
|
||||
for refresh in (False, True):
|
||||
api_info = self._get_api_info(refresh)
|
||||
if not api_info:
|
||||
continue
|
||||
|
||||
api_url = api_info.pop('url')
|
||||
meta = traverse_obj(
|
||||
self._download_json(
|
||||
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
|
||||
**api_info,
|
||||
'type': 'json',
|
||||
'optional_id': vod_id,
|
||||
'active_flg': 1,
|
||||
}), ('meta', 0))
|
||||
stream_url = traverse_obj(
|
||||
meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
|
||||
|
||||
if stream_url:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
|
||||
return {
|
||||
**traverse_obj(meta, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('publication_date', {unified_timestamp}),
|
||||
'release_timestamp': ('insert_date', {unified_timestamp}),
|
||||
'modified_timestamp': ('update_date', {unified_timestamp}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
raise ExtractorError('Unable to extract stream url')
|
||||
self._API_URL_TEMPLATE.format(
|
||||
lang=lang, content_format=content_format, page_type=page_type,
|
||||
m_id=m_id, extra_page=extra_page),
|
||||
join_nonempty(m_id, lang))
|
||||
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
|
||||
is_video = m_type != 'audio'
|
||||
|
||||
if is_video:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
if fetch_episode:
|
||||
episode = self._call_api(
|
||||
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
|
||||
episode_id, lang, is_video, is_episode=True, is_clip=episode_id[:4] == '9999')
|
||||
|
||||
def get_clean_field(key):
|
||||
return clean_html(episode.get(key + '_clean') or episode.get(key))
|
||||
video_id = join_nonempty('id', 'lang', from_dict=episode)
|
||||
|
||||
title = get_clean_field('sub_title')
|
||||
series = get_clean_field('title')
|
||||
|
||||
thumbnails = []
|
||||
for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
|
||||
img_path = episode.get('image' + s)
|
||||
if not img_path:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': f'{h}p',
|
||||
'height': h,
|
||||
'width': w,
|
||||
'url': 'https://www3.nhk.or.jp' + img_path,
|
||||
})
|
||||
title = episode.get('title')
|
||||
series = traverse_obj(episode, (('video_program', 'audio_program'), any, 'title'))
|
||||
|
||||
episode_name = title
|
||||
if series and title:
|
||||
@@ -125,37 +67,52 @@ def get_clean_field(key):
|
||||
episode_name = None
|
||||
|
||||
info = {
|
||||
'id': episode_id + '-' + lang,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_clean_field('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'episode': episode_name,
|
||||
**traverse_obj(episode, {
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('first_broadcasted_at', {unified_timestamp}),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'thumbnails': ('images', lambda _, v: v['url'], {
|
||||
'url': ('url', {urljoin(url)}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'webpage_url': ('url', {urljoin(url)}),
|
||||
}),
|
||||
'extractor_key': NhkVodIE.ie_key(),
|
||||
'extractor': NhkVodIE.IE_NAME,
|
||||
}
|
||||
|
||||
if is_video:
|
||||
vod_id = episode['vod_id']
|
||||
info.update({
|
||||
**self._extract_stream_info(vod_id),
|
||||
'id': vod_id,
|
||||
})
|
||||
|
||||
# XXX: We are assuming that 'video' and 'audio' are mutually exclusive
|
||||
stream_info = traverse_obj(episode, (('video', 'audio'), {dict}, any)) or {}
|
||||
if not stream_info.get('url'):
|
||||
self.raise_no_formats('Stream not found; it has most likely expired', expected=True)
|
||||
else:
|
||||
if fetch_episode:
|
||||
stream_url = stream_info['url']
|
||||
if is_video:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(stream_info, ({
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('published_at', {unified_timestamp}),
|
||||
})),
|
||||
})
|
||||
else:
|
||||
# From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
|
||||
audio_path = remove_end(episode['audio']['audio'], '.m4a')
|
||||
audio_path = remove_end(stream_url, '.m4a')
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': NhkVodIE.ie_key(),
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@@ -168,29 +125,29 @@ class NhkVodIE(NhkBaseIE):
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2049165/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
|
||||
'id': '2049165-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
|
||||
'title': 'Japan Railway Journal - Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'description': 'md5:ab57df2fca7f04245148c2e787bb203d',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'episode': 'Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'series': 'Japan Railway Journal',
|
||||
'modified_timestamp': 1707217907,
|
||||
'timestamp': 1681428600,
|
||||
'release_timestamp': 1693883728,
|
||||
'duration': 1679,
|
||||
'upload_date': '20230413',
|
||||
'modified_date': '20240206',
|
||||
'release_date': '20230905',
|
||||
'duration': 1680,
|
||||
'categories': ['Biz & Tech'],
|
||||
'tags': ['Akita', 'Chiba', 'Trains', 'Transcript', 'All (Japan Navigator)'],
|
||||
'timestamp': 1759055880,
|
||||
'upload_date': '20250928',
|
||||
'release_timestamp': 1758810600,
|
||||
'release_date': '20250925',
|
||||
},
|
||||
}, {
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '153c3016dfd252ba09726588149cf0e7',
|
||||
'info_dict': {
|
||||
'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
|
||||
'id': '9999011-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
@@ -198,24 +155,23 @@ class NhkVodIE(NhkBaseIE):
|
||||
'series': 'Dining with the Chef',
|
||||
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'duration': 148,
|
||||
'upload_date': '20190816',
|
||||
'release_date': '20230902',
|
||||
'release_timestamp': 1693619292,
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'timestamp': 1565997540,
|
||||
'categories': ['Food'],
|
||||
'tags': ['Washoku'],
|
||||
'timestamp': 1548212400,
|
||||
'upload_date': '20190123',
|
||||
},
|
||||
}, {
|
||||
# radio
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20240901-1/',
|
||||
'info_dict': {
|
||||
'id': 'livinginjapan-20231001-1-en',
|
||||
'id': 'livinginjapan-20240901-1-en',
|
||||
'ext': 'm4a',
|
||||
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
'title': 'Living in Japan - Weekend Hiking / Self-protection from crime',
|
||||
'series': 'Living in Japan',
|
||||
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
|
||||
'description': 'md5:4d0e14ab73bdbfedb60a53b093954ed6',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
'episode': 'Weekend Hiking / Self-protection from crime',
|
||||
'categories': ['Interactive'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
@@ -256,96 +212,51 @@ class NhkVodIE(NhkBaseIE):
|
||||
},
|
||||
'skip': 'expires 2023-10-15',
|
||||
}, {
|
||||
# a one-off (single-episode series). title from the api is just '<p></p>'
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
|
||||
# a one-off (single-episode series). title from the api is just null
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
|
||||
'id': '3026036-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'description': 'md5:5db620c46a0698451cc59add8816b797',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'release_date': '20230905',
|
||||
'timestamp': 1690103400,
|
||||
'duration': 2939,
|
||||
'release_timestamp': 1693898699,
|
||||
'upload_date': '20230723',
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'series': 'Barakan Discovers',
|
||||
'title': 'STATELESS: The Japanese Left Behind in the Philippines',
|
||||
'description': 'md5:9a2fd51cdfa9f52baae28569e0053786',
|
||||
'duration': 2955,
|
||||
'thumbnail': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/images/wide_l_QPtWpt4lzVhm3NzPAMIIF35MCg4CdNwcikPaTS5Q.jpg',
|
||||
'categories': ['Documentary', 'Culture & Lifestyle'],
|
||||
'tags': ['Transcript', 'Documentary 360', 'The Pursuit of PEACE'],
|
||||
'timestamp': 1758931800,
|
||||
'upload_date': '20250927',
|
||||
'release_timestamp': 1758931800,
|
||||
'release_date': '20250927',
|
||||
},
|
||||
}, {
|
||||
# /ondemand/video/ url with alphabetical character in 5th position of id
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
|
||||
'info_dict': {
|
||||
'id': 'nw_c_en_9999-a07',
|
||||
'id': '9999a07-en',
|
||||
'ext': 'mp4',
|
||||
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'series': 'Mini-Dramas on SDGs',
|
||||
'modified_date': '20240206',
|
||||
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
|
||||
'timestamp': 1621962360,
|
||||
'duration': 189,
|
||||
'release_date': '20230903',
|
||||
'modified_timestamp': 1707217907,
|
||||
'timestamp': 1621911600,
|
||||
'duration': 190,
|
||||
'upload_date': '20210525',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'release_timestamp': 1693713487,
|
||||
'categories': ['Current Affairs', 'Entertainment'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
|
||||
'info_dict': {
|
||||
'id': 'nw_c_en_9999-d17',
|
||||
'id': '9999d17-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
|
||||
'description': 'Today’s focus: Snow',
|
||||
'release_timestamp': 1693792402,
|
||||
'release_date': '20230904',
|
||||
'upload_date': '20220128',
|
||||
'timestamp': 1643370960,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 136,
|
||||
'series': '',
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
},
|
||||
}, {
|
||||
# new /shows/ url format
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
|
||||
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
|
||||
'episode': '20th Anniversary Special Part 1',
|
||||
'series': 'Japanology Plus',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 1680,
|
||||
'timestamp': 1711020600,
|
||||
'upload_date': '20240321',
|
||||
'release_timestamp': 1711022683,
|
||||
'release_date': '20240321',
|
||||
'modified_timestamp': 1711031012,
|
||||
'modified_date': '20240321',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
|
||||
'ext': 'mp4',
|
||||
'title': '100 Ideas to Save the World - Working Styles Evolve',
|
||||
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
|
||||
'episode': 'Working Styles Evolve',
|
||||
'series': '100 Ideas to Save the World',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 899,
|
||||
'upload_date': '20230325',
|
||||
'timestamp': 1679755200,
|
||||
'release_date': '20230905',
|
||||
'release_timestamp': 1693880540,
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
'categories': ['Culture & Lifestyle', 'Science & Nature'],
|
||||
'tags': ['Nara', 'Temples & Shrines', 'Winter', 'Snow'],
|
||||
'timestamp': 1643339040,
|
||||
'upload_date': '20220128',
|
||||
},
|
||||
}, {
|
||||
# new /shows/audio/ url format
|
||||
@@ -373,6 +284,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'sumo',
|
||||
'title': 'GRAND SUMO Highlights',
|
||||
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
|
||||
'series': 'GRAND SUMO Highlights',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
@@ -381,6 +293,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -390,6 +303,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -399,17 +313,9 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'livinginjapan',
|
||||
'title': 'Living in Japan',
|
||||
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
|
||||
'series': 'Living in Japan',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
# /tv/ program url
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
|
||||
'info_dict': {
|
||||
'id': 'designtalksplus',
|
||||
'title': 'DESIGN TALKS plus',
|
||||
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
|
||||
'only_matching': True,
|
||||
@@ -430,9 +336,8 @@ def _real_extract(self, url):
|
||||
program_id, lang, m_type != 'audio', False, episode_type == 'clip')
|
||||
|
||||
def entries():
|
||||
for episode in episodes:
|
||||
if episode_path := episode.get('url'):
|
||||
yield self._extract_episode_info(urljoin(url, episode_path), episode)
|
||||
for episode in traverse_obj(episodes, ('items', lambda _, v: v['url'])):
|
||||
yield self._extract_episode_info(urljoin(url, episode['url']), episode)
|
||||
|
||||
html = self._download_webpage(url, program_id)
|
||||
program_title = self._extract_meta_from_class_elements([
|
||||
@@ -446,7 +351,7 @@ def entries():
|
||||
'tAudioProgramMain__info', # /shows/audio/programs/
|
||||
'p-program-description'], html) # /tv/
|
||||
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description)
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description, series=program_title)
|
||||
|
||||
|
||||
class NhkForSchoolBangumiIE(InfoExtractor):
|
||||
|
||||
37
yt_dlp/extractor/nowcanal.py
Normal file
37
yt_dlp/extractor/nowcanal.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NowCanalIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowcanal\.pt(?:/[\w-]+)+/detalhe/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nowcanal.pt/ultimas/detalhe/pedro-sousa-hjulmand-pode-ter-uma-saida-limpa-do-sporting-daqui-a-um-ano',
|
||||
'md5': '047f17cb783e66e467d703e704bbc95d',
|
||||
'info_dict': {
|
||||
'id': '6376598467112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pedro Sousa «Hjulmand pode ter uma saída limpa do Sporting daqui a um ano»',
|
||||
'description': '',
|
||||
'uploader_id': '6108484330001',
|
||||
'duration': 65.237,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'timestamp': 1754440620,
|
||||
'upload_date': '20250806',
|
||||
'tags': ['now'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nowcanal.pt/programas/frente-a-frente/detalhe/frente-a-frente-eva-cruzeiro-ps-e-rita-matias-chega',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BC_URL_TMPL = 'https://players.brightcove.net/6108484330001/chhIqzukMq_default/index.html?videoId={}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_json(
|
||||
r'videoHandler\.addBrightcoveVideoWithJson\(\[',
|
||||
webpage, 'video data', display_id)['brightcoveVideoId']
|
||||
|
||||
return self.url_result(self._BC_URL_TMPL.format(video_id), BrightcoveNewIE)
|
||||
@@ -1,17 +1,40 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NTVRuIE(InfoExtractor):
|
||||
IE_NAME = 'ntv.ru'
|
||||
_VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/#?]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# JSON Api is geo restricted
|
||||
'url': 'https://www.ntv.ru/peredacha/svoya_igra/m58980/o818800',
|
||||
'md5': '818962a1b52747d446db7cd5be43e142',
|
||||
'info_dict': {
|
||||
'id': '2520563',
|
||||
'ext': 'mp4',
|
||||
'title': 'Участники: Ирина Петрова, Сергей Коновалов, Кристина Кораблина',
|
||||
'description': 'md5:fcbd21cd45238a940b95550f9e178e3e',
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 2462,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['игры и игрушки'],
|
||||
'timestamp': 1761821096,
|
||||
'upload_date': '20251030',
|
||||
'release_timestamp': 1761821096,
|
||||
'release_date': '20251030',
|
||||
'modified_timestamp': 1761821096,
|
||||
'modified_date': '20251030',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
|
||||
'info_dict': {
|
||||
@@ -22,31 +45,35 @@ class NTVRuIE(InfoExtractor):
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 136,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['ВМС', 'захват', 'митинги', 'Севастополь', 'Украина'],
|
||||
'timestamp': 1395222013,
|
||||
'upload_date': '20140319',
|
||||
'release_timestamp': 1395222013,
|
||||
'release_date': '20140319',
|
||||
'modified_timestamp': 1395222013,
|
||||
'modified_date': '20140319',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
'md5': 'adecff79691b4d71e25220a191477124',
|
||||
'info_dict': {
|
||||
'id': '750370',
|
||||
'ext': 'mp4',
|
||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 172,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Requires unescapeHTML
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
'info_dict': {
|
||||
'id': '747480',
|
||||
'ext': 'mp4',
|
||||
'title': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'description': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'title': '"Сегодня". 21 марта 2014 года. 16:00 ',
|
||||
'description': 'md5:bed80745ca72af557433195f51a02785',
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 1496,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['Брюссель', 'гражданство', 'ЕС', 'Крым', 'ОСАГО', 'саммит', 'санкции', 'события', 'чиновники', 'рейтинг'],
|
||||
'timestamp': 1395406951,
|
||||
'upload_date': '20140321',
|
||||
'release_timestamp': 1395406951,
|
||||
'release_date': '20140321',
|
||||
'modified_timestamp': 1395406951,
|
||||
'modified_date': '20140321',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
|
||||
@@ -54,11 +81,19 @@ class NTVRuIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1126480',
|
||||
'ext': 'mp4',
|
||||
'title': 'Остросюжетный фильм «Кома»',
|
||||
'description': 'Остросюжетный фильм «Кома»',
|
||||
'title': 'Остросюжетный фильм "Кома"',
|
||||
'description': 'md5:e79ffd0887425a0f05a58885c408d7d8',
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 5592,
|
||||
'duration': 5608,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['кино'],
|
||||
'timestamp': 1432868572,
|
||||
'upload_date': '20150529',
|
||||
'release_timestamp': 1432868572,
|
||||
'release_date': '20150529',
|
||||
'modified_timestamp': 1432868572,
|
||||
'modified_date': '20150529',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
@@ -66,11 +101,19 @@ class NTVRuIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '751482',
|
||||
'ext': 'mp4',
|
||||
'title': '«Дело врачей»: «Деревце жизни»',
|
||||
'description': '«Дело врачей»: «Деревце жизни»',
|
||||
'title': '"Дело врачей": "Деревце жизни"',
|
||||
'description': 'md5:d6fbf9193f880f50d9cbfbcc954161c1',
|
||||
'thumbnail': r're:^http://.*\.jpg',
|
||||
'duration': 2590,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['врачи', 'больницы'],
|
||||
'timestamp': 1395882300,
|
||||
'upload_date': '20140327',
|
||||
'release_timestamp': 1395882300,
|
||||
'release_date': '20140327',
|
||||
'modified_timestamp': 1395882300,
|
||||
'modified_date': '20140327',
|
||||
},
|
||||
}, {
|
||||
# Schemeless file URL
|
||||
@@ -78,48 +121,26 @@ class NTVRuIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._og_search_property(
|
||||
('video', 'video:iframe'), webpage, default=None)
|
||||
if video_url:
|
||||
video_id = self._search_regex(
|
||||
r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
|
||||
video_url, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
video_id = self._html_search_regex(
|
||||
r'<meta property="ya:ovs:feed_url" content="https?://www\.ntv\.ru/(?:exp/)?video/(\d+)', webpage, 'video id')
|
||||
|
||||
player = self._download_xml(
|
||||
f'http://www.ntv.ru/vi{video_id}/',
|
||||
video_id, 'Downloading video XML')
|
||||
|
||||
title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
|
||||
|
||||
video = player.find('./data/video')
|
||||
|
||||
formats = []
|
||||
for format_id in ['', 'hi', 'webm']:
|
||||
file_ = xpath_text(video, f'./{format_id}file')
|
||||
if not file_:
|
||||
video_url = url_or_none(xpath_text(video, f'./{format_id}file'))
|
||||
if not video_url:
|
||||
continue
|
||||
if file_.startswith('//'):
|
||||
file_ = self._proto_relative_url(file_)
|
||||
elif not file_.startswith('http'):
|
||||
file_ = 'http://media.ntv.ru/vod/' + file_
|
||||
formats.append({
|
||||
'url': file_,
|
||||
'url': video_url,
|
||||
'filesize': int_or_none(xpath_text(video, f'./{format_id}size')),
|
||||
})
|
||||
hls_manifest = xpath_text(video, './playback/hls')
|
||||
@@ -131,12 +152,28 @@ def _real_extract(self, url):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
dash_manifest, video_id, mpd_id='dash', fatal=False))
|
||||
|
||||
metadata = self._download_xml(
|
||||
f'https://www.ntv.ru/exp/video/{video_id}', video_id, 'Downloading XML metadata', fatal=False)
|
||||
|
||||
return {
|
||||
'id': xpath_text(video, './id'),
|
||||
'title': title,
|
||||
'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
|
||||
'thumbnail': xpath_text(video, './splash'),
|
||||
'duration': int_or_none(xpath_text(video, './totaltime')),
|
||||
'view_count': int_or_none(xpath_text(video, './views')),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player, {
|
||||
'title': ('data/title/text()', ..., {str}, {unescapeHTML}, any),
|
||||
'description': ('data/description/text()', ..., {str}, {unescapeHTML}, any),
|
||||
'duration': ('data/video/totaltime/text()', ..., {int_or_none}, any),
|
||||
'view_count': ('data/video/views/text()', ..., {int_or_none}, any),
|
||||
'thumbnail': ('data/video/splash/text()', ..., {url_or_none}, any),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('{*}title/text()', ..., {str}, {unescapeHTML}, any),
|
||||
'description': ('{*}description/text()', ..., {str}, {unescapeHTML}, any),
|
||||
'duration': ('{*}duration/text()', ..., {int_or_none}, any),
|
||||
'timestamp': ('{*}create_date/text()', ..., {parse_iso8601}, any),
|
||||
'release_timestamp': ('{*}upload_date/text()', ..., {parse_iso8601}, any),
|
||||
'modified_timestamp': ('{*}modify_date/text()', ..., {parse_iso8601}, any),
|
||||
'tags': ('{*}tag/text()', ..., {str}, {lambda x: x.split(',')}, ..., {str.strip}, filter),
|
||||
'view_count': ('{*}stats/views_total/text()', ..., {int_or_none}, any),
|
||||
'comment_count': ('{*}stats/comments/text()', ..., {int_or_none}, any),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -598,7 +598,8 @@ def _real_extract(self, url):
|
||||
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
|
||||
if not campaign_id:
|
||||
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
|
||||
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
|
||||
((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
|
||||
'id', {str}, any, {require('campaign ID')}))
|
||||
|
||||
params = {
|
||||
'json-api-use-default-includes': 'false',
|
||||
|
||||
@@ -3,12 +3,14 @@
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://rinse.fm/api/query/v1'
|
||||
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
@@ -45,8 +47,10 @@ class RinseFMIE(RinseFMBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
entry = self._download_json(
|
||||
f'{self._API_BASE}/episodes/{display_id}', display_id,
|
||||
note='Downloading episode data from API')['entry']
|
||||
|
||||
return self._parse_entry(entry)
|
||||
|
||||
@@ -58,32 +62,35 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.',
|
||||
'description': 'md5:fd6a7254e8273510e6d49fbf50edf392',
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'url': 'https://www.rinse.fm/shows/esk',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.',
|
||||
'id': 'esk',
|
||||
'title': 'Esk',
|
||||
'description': 'md5:5893d7c1d411ae8dea7fba12f109aa98',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
api_data = self._download_json(
|
||||
f'{self._API_BASE}/shows/{playlist_id}', playlist_id,
|
||||
note='Downloading show data from API')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
self._entries(api_data), playlist_id,
|
||||
**traverse_obj(api_data, ('entry', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
})))
|
||||
|
||||
@@ -15,14 +15,15 @@ class S4CIE(InfoExtractor):
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||
# Geo restricted to the UK
|
||||
'url': 'https://www.s4c.cymru/clic/programme/886303048',
|
||||
'info_dict': {
|
||||
'id': '856636948',
|
||||
'id': '886303048',
|
||||
'ext': 'mp4',
|
||||
'title': 'Am Dro',
|
||||
'title': 'Pennod 1',
|
||||
'description': 'md5:7e3f364b70f61fcdaa8b4cb4a3eb3e7a',
|
||||
'duration': 2880,
|
||||
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Stad_2025S4C_P1_210053.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -51,7 +52,7 @@ def _real_extract(self, url):
|
||||
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||
'mode': 'od',
|
||||
'application': 'clic',
|
||||
'region': 'WW',
|
||||
'region': 'UK' if player_config.get('application') == 's4chttpl' else 'WW',
|
||||
'extra': 'false',
|
||||
'thirdParty': 'false',
|
||||
'filename': player_config['filename'],
|
||||
|
||||
@@ -1064,7 +1064,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?:soundcloud(?:%3A|:)playlists(?:%3A|:))?(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||
@@ -1079,6 +1079,12 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud%3Aplaylists%3A1759227795',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud:playlists:2104769627?secret_token=s-wmpCLuExeYX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,10 +8,11 @@
|
||||
|
||||
|
||||
class SportDeutschlandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
|
||||
IE_NAME = 'sporteurope'
|
||||
_VALID_URL = r'https?://(?:player\.)?sporteurope\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
|
||||
_TESTS = [{
|
||||
# Single-part video, direct link
|
||||
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'url': 'https://sporteurope.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'md5': '35c11a19395c938cdd076b93bda54cde',
|
||||
'info_dict': {
|
||||
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
|
||||
@@ -19,9 +20,9 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
|
||||
'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'channel': 'Rostock Griffins',
|
||||
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
|
||||
'channel_url': 'https://sporteurope.tv/rostock-griffins',
|
||||
'live_status': 'was_live',
|
||||
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
|
||||
'description': r're:Video-Livestream des Spiels Rostock Griffins vs\. Elmshorn Fighting Pirates.+',
|
||||
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
|
||||
'timestamp': 1749913117,
|
||||
'upload_date': '20250614',
|
||||
@@ -29,16 +30,16 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# Single-part video, embedded player link
|
||||
'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'url': 'https://player.sporteurope.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'info_dict': {
|
||||
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
|
||||
'ext': 'mp4',
|
||||
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
|
||||
'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'channel': 'Rostock Griffins',
|
||||
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
|
||||
'channel_url': 'https://sporteurope.tv/rostock-griffins',
|
||||
'live_status': 'was_live',
|
||||
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
|
||||
'description': r're:Video-Livestream des Spiels Rostock Griffins vs\. Elmshorn Fighting Pirates.+',
|
||||
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
|
||||
'timestamp': 1749913117,
|
||||
'upload_date': '20250614',
|
||||
@@ -47,7 +48,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Multi-part video
|
||||
'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
'url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
'info_dict': {
|
||||
'id': '9f63d737-2444-4e3a-a1ea-840df73fd481',
|
||||
'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
@@ -55,7 +56,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'description': 'md5:0a17da15e48a687e6019639c3452572b',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
@@ -66,7 +67,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'duration': 14773.0,
|
||||
'timestamp': 1753085197,
|
||||
'upload_date': '20250721',
|
||||
@@ -79,16 +80,17 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'duration': 14773.0,
|
||||
'timestamp': 1753128421,
|
||||
'upload_date': '20250721',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Livestream
|
||||
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
|
||||
'url': 'https://sporteurope.tv/dtb/gymnastik-international-tag-1',
|
||||
'info_dict': {
|
||||
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
|
||||
'ext': 'mp4',
|
||||
@@ -96,7 +98,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'display_id': 'dtb/gymnastik-international-tag-1',
|
||||
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
|
||||
'channel': 'Deutscher Turner-Bund',
|
||||
'channel_url': 'https://sportdeutschland.tv/dtb',
|
||||
'channel_url': 'https://sporteurope.tv/dtb',
|
||||
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
@@ -106,9 +108,9 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
def _process_video(self, asset_id, video):
|
||||
is_live = video['type'] == 'mux_live'
|
||||
token = self._download_json(
|
||||
f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}',
|
||||
f'https://api.sporteurope.tv/api/web/personal/asset-token/{asset_id}',
|
||||
video['id'], query={'type': video['type'], 'playback_id': video['src']},
|
||||
headers={'Referer': 'https://sportdeutschland.tv/'})['token']
|
||||
headers={'Referer': 'https://sporteurope.tv/'})['token']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live)
|
||||
|
||||
@@ -126,7 +128,7 @@ def _process_video(self, asset_id, video):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
f'https://api.sportdeutschland.tv/api/stateless/frontend/assets/{display_id}',
|
||||
f'https://api.sporteurope.tv/api/stateless/frontend/assets/{display_id}',
|
||||
display_id, query={'access_token': 'true'})
|
||||
|
||||
info = {
|
||||
@@ -139,7 +141,7 @@ def _real_extract(self, url):
|
||||
'channel_id': ('profile', 'id'),
|
||||
'is_live': 'currently_live',
|
||||
'was_live': 'was_live',
|
||||
'channel_url': ('profile', 'slug', {lambda x: f'https://sportdeutschland.tv/{x}'}),
|
||||
'channel_url': ('profile', 'slug', {lambda x: f'https://sporteurope.tv/{x}'}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
@@ -101,8 +101,8 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
data = self._search_json(
|
||||
r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info',
|
||||
video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
r'(?:window\.|(?:var|const|let)\s+)(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage,
|
||||
'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_call,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ThisOldHouseIE(InfoExtractor):
|
||||
@@ -77,46 +76,43 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
|
||||
urlh = self._request_webpage(
|
||||
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
|
||||
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
|
||||
login_page = self._download_webpage(
|
||||
'https://www.thisoldhouse.com/insider-login', None, 'Downloading login page')
|
||||
hidden_inputs = self._hidden_inputs(login_page)
|
||||
response = self._download_json(
|
||||
'https://www.thisoldhouse.com/wp-admin/admin-ajax.php', None, 'Logging in',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=urlencode_postdata(filter_dict({
|
||||
'action': 'onebill_subscriber_login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'pricingPlanTerm': hidden_inputs['pricing_plan_term'],
|
||||
'utm_parameters': hidden_inputs.get('utm_parameters'),
|
||||
'nonce': hidden_inputs['mdcr_onebill_login_nonce'],
|
||||
})))
|
||||
|
||||
try:
|
||||
auth_form = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Submitting credentials', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': urlh.url,
|
||||
}, data=json.dumps(filter_dict({
|
||||
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
|
||||
'tenant': 'thisoldhouse',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'popup_options': {},
|
||||
'sso': True,
|
||||
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
|
||||
'_intstate': 'deprecated',
|
||||
}), separators=(',', ':')).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
message = traverse_obj(response, ('data', 'message', {str}))
|
||||
if not response['success']:
|
||||
if message and 'Something went wrong' in message:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(
|
||||
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
|
||||
data=urlencode_postdata(self._hidden_inputs(auth_form)))
|
||||
raise ExtractorError(message or 'Login was unsuccessful')
|
||||
if message and 'Your subscription is not active' in message:
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said your subscription is not active. '
|
||||
f'If your subscription is active, this could be caused by too many sign-ins, '
|
||||
f'and you should instead try using {self._login_hint(method="cookies")[4:]}')
|
||||
else:
|
||||
self.write_debug(f'{self.IE_NAME} said: {message}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if 'To Unlock This content' in webpage:
|
||||
self.raise_login_required(
|
||||
'This video is only available for subscribers. '
|
||||
'Note that --cookies-from-browser may not work due to this site using session cookies')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
# If login response says inactive subscription, site redirects to frontpage for Insider content
|
||||
if 'To Unlock This content' in webpage or urllib.parse.urlparse(urlh.url).path in ('', '/'):
|
||||
self.raise_login_required('This video is only available for subscribers')
|
||||
|
||||
video_url, video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
|
||||
@@ -136,8 +136,10 @@ class TubeTuGrazIE(TubeTuGrazBaseIE):
|
||||
IE_DESC = 'tube.tugraz.at'
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
|
||||
(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
|
||||
https?://tube\.tugraz\.at/(?:
|
||||
paella/ui/watch\.html\?(?:[^#]*&)?id=|
|
||||
portal/watch/
|
||||
)(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -149,9 +151,9 @@ class TubeTuGrazIE(TubeTuGrazBaseIE):
|
||||
'title': '#6 (23.11.2017)',
|
||||
'episode': '#6 (23.11.2017)',
|
||||
'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
|
||||
'creator': 'Safran C',
|
||||
'duration': 3295818,
|
||||
'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
|
||||
'creators': ['Safran C'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
|
||||
@@ -162,6 +164,10 @@ class TubeTuGrazIE(TubeTuGrazBaseIE):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'expected_warnings': ['Extractor failed to obtain "title"'],
|
||||
}, {
|
||||
# Portal URL format
|
||||
'url': 'https://tube.tugraz.at/portal/watch/ab28ec60-8cbe-4f1a-9b96-a95add56c612',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -182,13 +182,13 @@ def _entries(self, show_url, playlist_id, selected_season):
|
||||
webpage = self._download_webpage(show_url, playlist_id)
|
||||
|
||||
data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['video']
|
||||
r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['queries'][0]['state']['data']
|
||||
|
||||
# v['number'] is already a decimal string, but stringify to protect against API changes
|
||||
path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
|
||||
|
||||
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)):
|
||||
for season in traverse_obj(data, ('seasons', *path)):
|
||||
season_number = int_or_none(season.get('number'))
|
||||
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
|
||||
episode_id = episode['id']
|
||||
|
||||
@@ -41,16 +41,16 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
_OPERATION_HASHES = {
|
||||
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
|
||||
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
|
||||
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
|
||||
'ShareClipRenderStatus': 'e0a46b287d760c6890a39d1ccd736af5ec9479a267d02c710e9ac33326b651d2',
|
||||
'ChannelCollectionsContent': '447aec6a0cc1e8d0a8d7732d47eb0762c336a2294fdb009e9c9d854e49d484b9',
|
||||
'StreamMetadata': 'a647c2a13599e5991e175155f798ca7f1ecddde73f7f341f39009c14dbf59962',
|
||||
'CollectionSideBar': '016e1e4ccee0eb4698eb3bf1a04dc1c077fb746c78c82bac9a8f0289658fbd1a',
|
||||
'FilterableVideoTower_Videos': '67004f7881e65c297936f32c75246470629557a393788fb5a69d6d9a25a8fd5f',
|
||||
'ClipsCards__User': '90c33f5e6465122fba8f9371e2a97076f9ed06c6fed3788d002ab9eba8f91d88',
|
||||
'ShareClipRenderStatus': '1844261bb449fa51e6167040311da4a7a5f1c34fe71c71a3e0c4f551bc30c698',
|
||||
'ChannelCollectionsContent': '5247910a19b1cd2b760939bf4cba4dcbd3d13bdf8c266decd16956f6ef814077',
|
||||
'StreamMetadata': 'b57f9b910f8cd1a4659d894fe7550ccc81ec9052c01e438b290fd66a040b9b93',
|
||||
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
|
||||
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
|
||||
'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad',
|
||||
'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
|
||||
'VideoPreviewOverlay': '9515480dee68a77e667cb19de634739d33f243572b007e98e67184b1a5d8369f',
|
||||
'VideoMetadata': '45111672eea2e507f8ba44d101a61862f9c56b11dee09a15634cb75cb9b9084d',
|
||||
'VideoPlayer_ChapterSelectButtonVideo': '71835d5ef425e154bf282453a926d99b328cdc5e32f36d3a209d0f4778b41203',
|
||||
'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
|
||||
}
|
||||
|
||||
@@ -621,15 +621,15 @@ def _make_video_result(node):
|
||||
|
||||
|
||||
class TwitchCollectionIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:collection'
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
|
||||
'url': 'https://www.twitch.tv/collections/o9zZer3IQBhTJw',
|
||||
'info_dict': {
|
||||
'id': 'wlDCoH0zEBZZbQ',
|
||||
'title': 'Overthrow Nook, capitalism for children',
|
||||
'id': 'o9zZer3IQBhTJw',
|
||||
'title': 'Playthrough Archives',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
'playlist_mincount': 21,
|
||||
}]
|
||||
|
||||
_OPERATION_NAME = 'CollectionSideBar'
|
||||
@@ -720,8 +720,8 @@ def _make_variables(channel_name, broadcast_type, sort):
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
IE_NAME = 'twitch:videos'
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
|
||||
|
||||
_TESTS = [{
|
||||
# All Videos sorted by Date
|
||||
'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
|
||||
@@ -729,7 +729,7 @@ class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
'id': 'spamfish',
|
||||
'title': 'spamfish - All Videos sorted by Date',
|
||||
},
|
||||
'playlist_mincount': 924,
|
||||
'playlist_mincount': 751,
|
||||
}, {
|
||||
# All Videos sorted by Popular
|
||||
'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
|
||||
@@ -737,8 +737,9 @@ class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
'id': 'spamfish',
|
||||
'title': 'spamfish - All Videos sorted by Popular',
|
||||
},
|
||||
'playlist_mincount': 931,
|
||||
'playlist_mincount': 754,
|
||||
}, {
|
||||
# TODO: Investigate why we get 0 entries
|
||||
# Past Broadcasts sorted by Date
|
||||
'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
|
||||
'info_dict': {
|
||||
@@ -753,8 +754,9 @@ class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
'id': 'spamfish',
|
||||
'title': 'spamfish - Highlights sorted by Date',
|
||||
},
|
||||
'playlist_mincount': 901,
|
||||
'playlist_mincount': 751,
|
||||
}, {
|
||||
# TODO: Investigate why we get 0 entries
|
||||
# Uploads sorted by Date
|
||||
'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
|
||||
'info_dict': {
|
||||
@@ -763,6 +765,7 @@ class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
# TODO: Investigate why we get 0 entries
|
||||
# Past Premieres sorted by Date
|
||||
'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
|
||||
'info_dict': {
|
||||
@@ -825,8 +828,8 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
|
||||
IE_NAME = 'twitch:videos:clips'
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
|
||||
|
||||
_TESTS = [{
|
||||
# Clips
|
||||
'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
|
||||
@@ -898,8 +901,8 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
|
||||
IE_NAME = 'twitch:videos:collections'
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
|
||||
|
||||
_TESTS = [{
|
||||
# Collections
|
||||
'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
|
||||
@@ -1050,7 +1053,10 @@ def _real_extract(self, url):
|
||||
gql = self._download_gql(
|
||||
channel_name, [{
|
||||
'operationName': 'StreamMetadata',
|
||||
'variables': {'channelLogin': channel_name},
|
||||
'variables': {
|
||||
'channelLogin': channel_name,
|
||||
'includeIsDJ': True,
|
||||
},
|
||||
}, {
|
||||
'operationName': 'ComscoreStreamingQuery',
|
||||
'variables': {
|
||||
|
||||
@@ -7,14 +7,15 @@
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class URPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
|
||||
'md5': '5ba36643c77cc3d34ffeadad89937d1e',
|
||||
'info_dict': {
|
||||
'id': '203704',
|
||||
'ext': 'mp4',
|
||||
@@ -25,11 +26,12 @@ class URPlayIE(InfoExtractor):
|
||||
'upload_date': '20171214',
|
||||
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
||||
'duration': 2269,
|
||||
'categories': ['Vetenskap & teknik'],
|
||||
'categories': ['Kultur & historia'],
|
||||
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'age_limit': 15,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv',
|
||||
'info_dict': {
|
||||
@@ -48,6 +50,7 @@ class URPlayIE(InfoExtractor):
|
||||
'tags': 'count:7',
|
||||
'episode': 'Mitt barn skadar sig själv',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
'info_dict': {
|
||||
@@ -67,6 +70,27 @@ class URPlayIE(InfoExtractor):
|
||||
'episode': 'Sovkudde',
|
||||
'season': 'Säsong 1',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Only accessible through new media api
|
||||
'url': 'https://urplay.se/program/242932-vulkanernas-krafter-fran-kraftfull-till-forgorande',
|
||||
'info_dict': {
|
||||
'id': '242932',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanernas krafter : Från kraftfull till förgörande',
|
||||
'description': 'md5:742bb87048e7d5a7f209d28f9bb70ab1',
|
||||
'age_limit': 15,
|
||||
'duration': 2613,
|
||||
'thumbnail': 'https://assets.ur.se/id/242932/images/1_hd.jpg',
|
||||
'categories': ['Vetenskap & teknik'],
|
||||
'tags': ['Geofysik', 'Naturvetenskap', 'Vulkaner', 'Vulkanutbrott'],
|
||||
'series': 'Vulkanernas krafter',
|
||||
'episode': 'Från kraftfull till förgörande',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1763514000,
|
||||
'upload_date': '20251119',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
|
||||
'only_matching': True,
|
||||
@@ -78,7 +102,7 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
urplayer_data = self._search_nextjs_data(webpage, video_id, fatal=False) or {}
|
||||
if urplayer_data:
|
||||
urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
|
||||
urplayer_data = traverse_obj(urplayer_data, ('props', 'pageProps', 'productData', {dict}))
|
||||
if not urplayer_data:
|
||||
raise ExtractorError('Unable to parse __NEXT_DATA__')
|
||||
else:
|
||||
@@ -87,21 +111,12 @@ def _real_extract(self, url):
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||
episode = urplayer_data['title']
|
||||
|
||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||
formats = []
|
||||
urplayer_streams = urplayer_data.get('streamingInfo', {})
|
||||
|
||||
for k, v in urplayer_streams.get('raw', {}).items():
|
||||
if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)):
|
||||
continue
|
||||
file_http = v.get('location')
|
||||
if file_http:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
f'http://{host}/{file_http}playlist.m3u8',
|
||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||
|
||||
subtitles = {}
|
||||
sources = self._download_json(
|
||||
f'https://media-api.urplay.se/config-streaming/v1/urplay/sources/{video_id}', video_id,
|
||||
note='Downloading streaming information')
|
||||
hls_url = traverse_obj(sources, ('sources', 'hls', {url_or_none}, {require('HLS URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
def parse_lang_code(code):
|
||||
"3-character language code or None (utils candidate)"
|
||||
|
||||
@@ -339,11 +339,20 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
'title': 'The Roof S2: The Modern CRO',
|
||||
'thumbnail': r're:https?://embed(?:-ssl)?\.wistia\.com/.+\.(?:jpg|png)',
|
||||
'duration': 86.487,
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season. ',
|
||||
'timestamp': 1619790290,
|
||||
'upload_date': '20210430',
|
||||
},
|
||||
'params': {'noplaylist': True, 'skip_download': True},
|
||||
}, {
|
||||
# Channel with episodes structure instead of videos
|
||||
'url': 'https://fast.wistia.net/embed/channel/sapab9p6qd',
|
||||
'info_dict': {
|
||||
'id': 'sapab9p6qd',
|
||||
'title': 'Credo: An RCIA Program',
|
||||
'description': '\n',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.profitwell.com/recur/boxed-out',
|
||||
@@ -399,8 +408,7 @@ def _real_extract(self, url):
|
||||
|
||||
entries = [
|
||||
self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
|
||||
for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
|
||||
if video.get('hashedId')
|
||||
for video in traverse_obj(series, ('sections', ..., ('videos', 'episodes'), lambda _, v: v['hashedId']))
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import base64
|
||||
import codecs
|
||||
import itertools
|
||||
import re
|
||||
import string
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -16,7 +14,6 @@
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
@@ -32,7 +29,7 @@ def __init__(self, algo_id, seed):
|
||||
try:
|
||||
self._algorithm = getattr(self, f'_algo{algo_id}')
|
||||
except AttributeError:
|
||||
raise ExtractorError(f'Unknown algorithm ID: {algo_id}')
|
||||
raise ExtractorError(f'Unknown algorithm ID "{algo_id}"')
|
||||
self._s = to_signed_32(seed)
|
||||
|
||||
def _algo1(self, s):
|
||||
@@ -60,6 +57,37 @@ def _algo3(self, s):
|
||||
s = to_signed_32(s * to_signed_32(0xc2b2ae3d))
|
||||
return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16))
|
||||
|
||||
def _algo4(self, s):
|
||||
# Custom scrambling function involving a left rotation (ROL)
|
||||
s = self._s = to_signed_32(s + 0x6d2b79f5)
|
||||
s = to_signed_32((s << 7) | ((s & 0xFFFFFFFF) >> 25)) # ROL 7
|
||||
s = to_signed_32(s + 0x9e3779b9)
|
||||
s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 11))
|
||||
return to_signed_32(s * 0x27d4eb2d)
|
||||
|
||||
def _algo5(self, s):
|
||||
# xorshift variant with a final addition
|
||||
s = to_signed_32(s ^ (s << 7))
|
||||
s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 9))
|
||||
s = to_signed_32(s ^ (s << 8))
|
||||
s = self._s = to_signed_32(s + 0xa5a5a5a5)
|
||||
return s
|
||||
|
||||
def _algo6(self, s):
|
||||
# LCG (a=0x2c9277b5, c=0xac564b05) with a variable right shift scrambler
|
||||
s = self._s = to_signed_32(s * to_signed_32(0x2c9277b5) + to_signed_32(0xac564b05))
|
||||
s2 = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 18))
|
||||
shift = (s & 0xFFFFFFFF) >> 27 & 31
|
||||
return to_signed_32((s2 & 0xFFFFFFFF) >> shift)
|
||||
|
||||
def _algo7(self, s):
|
||||
# Weyl Sequence (k=0x9e3779b9) + custom multiply-xor-shift mixing function
|
||||
s = self._s = to_signed_32(s + to_signed_32(0x9e3779b9))
|
||||
e = to_signed_32(s ^ (s << 5))
|
||||
e = to_signed_32(e * to_signed_32(0x7feb352d))
|
||||
e = to_signed_32(e ^ ((e & 0xFFFFFFFF) >> 15))
|
||||
return to_signed_32(e * to_signed_32(0x846ca68b))
|
||||
|
||||
def __next__(self):
|
||||
return self._algorithm(self._s) & 0xFF
|
||||
|
||||
@@ -185,32 +213,28 @@ class XHamsterIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_XOR_KEY = b'xh7999'
|
||||
|
||||
def _decipher_format_url(self, format_url, format_id):
|
||||
if all(char in string.hexdigits for char in format_url):
|
||||
byte_data = bytes.fromhex(format_url)
|
||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||
byte_gen = _ByteGenerator(byte_data[0], seed)
|
||||
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||
parsed_url = urllib.parse.urlparse(format_url)
|
||||
|
||||
cipher_type, _, ciphertext = try_call(
|
||||
lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3
|
||||
|
||||
if not cipher_type or not ciphertext:
|
||||
self.report_warning(f'Skipping format "{format_id}": failed to decipher URL')
|
||||
hex_string, path_remainder = self._search_regex(
|
||||
r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
|
||||
default=(None, None), group=('hex', 'rem'))
|
||||
if not hex_string:
|
||||
self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
|
||||
return None
|
||||
|
||||
if cipher_type == 'xor':
|
||||
return bytes(
|
||||
a ^ b for a, b in
|
||||
zip(ciphertext.encode(), itertools.cycle(self._XOR_KEY))).decode()
|
||||
byte_data = bytes.fromhex(hex_string)
|
||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||
|
||||
if cipher_type == 'rot13':
|
||||
return codecs.decode(ciphertext, cipher_type)
|
||||
try:
|
||||
byte_gen = _ByteGenerator(byte_data[0], seed)
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
|
||||
return None
|
||||
|
||||
self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"')
|
||||
return None
|
||||
deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||
|
||||
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
|
||||
|
||||
def _fixup_formats(self, formats):
|
||||
for f in formats:
|
||||
@@ -333,8 +357,11 @@ def get_height(s):
|
||||
'height': get_height(quality),
|
||||
'filesize': format_sizes.get(quality),
|
||||
'http_headers': {
|
||||
'Referer': standard_url,
|
||||
'Referer': urlh.url,
|
||||
},
|
||||
# HTTP formats return "Wrong key" error even when deciphered by site JS
|
||||
# TODO: Remove this when resolved on the site's end
|
||||
'__needs_testing': True,
|
||||
})
|
||||
|
||||
categories_list = video.get('categories')
|
||||
@@ -371,7 +398,8 @@ def get_height(s):
|
||||
'age_limit': age_limit if age_limit is not None else 18,
|
||||
'categories': categories,
|
||||
'formats': self._fixup_formats(formats),
|
||||
'_format_sort_fields': ('res', 'proto', 'tbr'),
|
||||
# TODO: Revert to ('res', 'proto', 'tbr') when HTTP formats problem is resolved
|
||||
'_format_sort_fields': ('res', 'proto:m3u8', 'tbr'),
|
||||
}
|
||||
|
||||
# Old layout fallback
|
||||
|
||||
67
yt_dlp/extractor/yfanefa.py
Normal file
67
yt_dlp/extractor/yfanefa.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YfanefaIE(InfoExtractor):
|
||||
IE_NAME = 'yfanefa'
|
||||
_VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.yfanefa.com/record/2717',
|
||||
'info_dict': {
|
||||
'id': 'record-2717',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914',
|
||||
'duration': 5239,
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/news/53',
|
||||
'info_dict': {
|
||||
'id': 'news-53',
|
||||
'ext': 'mp4',
|
||||
'title': 'Memory Bank: Bradford Launch',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/evaluating_nature_matters',
|
||||
'info_dict': {
|
||||
'id': 'evaluating_nature_matters',
|
||||
'ext': 'mp4',
|
||||
'title': 'Evaluating Nature Matters',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = self._search_json(
|
||||
r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id)
|
||||
|
||||
formats = []
|
||||
video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='')
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||
|
||||
return {
|
||||
'id': video_id.strip('/').replace('/', '-'),
|
||||
'title':
|
||||
self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'),
|
||||
'formats': formats,
|
||||
**traverse_obj(player_data, {
|
||||
'thumbnail': ('preview', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -104,6 +104,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||
@@ -117,6 +118,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
'web_embedded': {
|
||||
@@ -157,6 +159,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
},
|
||||
# This client now requires sign-in for every video
|
||||
'web_creator': {
|
||||
@@ -313,6 +316,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
},
|
||||
'tv': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
@@ -327,6 +331,17 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
# See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
|
||||
'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
|
||||
},
|
||||
'tv_downgraded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5',
|
||||
'clientVersion': '5.20251105',
|
||||
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
'tv_simply': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
@@ -380,11 +395,15 @@ def short_client_name(client_name):
|
||||
return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
|
||||
|
||||
|
||||
def build_innertube_clients():
|
||||
THIRD_PARTY = {
|
||||
def _fix_embedded_ytcfg(ytcfg):
|
||||
ytcfg['INNERTUBE_CONTEXT'].setdefault('thirdParty', {}).update({
|
||||
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
|
||||
}
|
||||
BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
|
||||
})
|
||||
|
||||
|
||||
def build_innertube_clients():
|
||||
# From highest to lowest priority
|
||||
BASE_CLIENTS = ('tv', 'web', 'mweb', 'android', 'ios')
|
||||
priority = qualities(BASE_CLIENTS[::-1])
|
||||
|
||||
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
||||
@@ -397,6 +416,7 @@ def build_innertube_clients():
|
||||
ytcfg.setdefault('SUBS_PO_TOKEN_POLICY', SubsPoTokenPolicy())
|
||||
ytcfg.setdefault('REQUIRE_AUTH', False)
|
||||
ytcfg.setdefault('SUPPORTS_COOKIES', False)
|
||||
ytcfg.setdefault('SUPPORTS_AD_PLAYBACK_CONTEXT', False)
|
||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||
ytcfg.setdefault('AUTHENTICATED_USER_AGENT', None)
|
||||
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
||||
@@ -405,10 +425,7 @@ def build_innertube_clients():
|
||||
ytcfg['priority'] = 10 * priority(base_client)
|
||||
|
||||
if variant == 'embedded':
|
||||
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
|
||||
ytcfg['priority'] -= 2
|
||||
elif variant:
|
||||
ytcfg['priority'] -= 3
|
||||
_fix_embedded_ytcfg(ytcfg)
|
||||
|
||||
|
||||
build_innertube_clients()
|
||||
@@ -991,6 +1008,10 @@ def _download_ytcfg(self, client, video_id):
|
||||
|
||||
ytcfg = self.extract_ytcfg(video_id, webpage) or {}
|
||||
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/14826
|
||||
if _split_innertube_client(client)[2] == 'embedded':
|
||||
_fix_embedded_ytcfg(ytcfg)
|
||||
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/12563
|
||||
# But it's not effective when logged-in
|
||||
if client == 'tv' and not self.is_authenticated:
|
||||
|
||||
@@ -340,8 +340,9 @@ def _extract_lockup_view_model(self, view_model):
|
||||
thumbnails=self._extract_thumbnails(view_model, (
|
||||
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
|
||||
duration=traverse_obj(view_model, (
|
||||
'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
|
||||
'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
|
||||
'contentImage', 'thumbnailViewModel', 'overlays', ...,
|
||||
(('thumbnailBottomOverlayViewModel', 'badges'), ('thumbnailOverlayBadgeViewModel', 'thumbnailBadges')),
|
||||
..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
|
||||
timestamp=(traverse_obj(view_model, (
|
||||
'metadata', 'lockupMetadataViewModel', 'metadata', 'contentMetadataViewModel', 'metadataRows',
|
||||
..., 'metadataParts', ..., 'text', 'content', {lambda t: self._parse_time_text(t, report_failure=False)}, any))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
132
yt_dlp/extractor/youtube/jsc/README.md
Normal file
132
yt_dlp/extractor/youtube/jsc/README.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# YoutubeIE JS Challenge Provider Framework
|
||||
|
||||
As part of the YouTube extractor, we have a framework for solving n/sig JS Challenges programmatically. This can be used by plugins.
|
||||
|
||||
> [!TIP]
|
||||
> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it.
|
||||
|
||||
|
||||
## Public APIs
|
||||
|
||||
- `yt_dlp.extractor.youtube.jsc.provider`
|
||||
|
||||
Everything else is **internal-only** and no guarantees are made about the API stability.
|
||||
|
||||
> [!WARNING]
|
||||
> We will try our best to maintain stability with the public APIs.
|
||||
> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
|
||||
> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
|
||||
|
||||
## JS Challenge Provider
|
||||
|
||||
`yt_dlp.extractor.youtube.jsc.provider`
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
register_provider,
|
||||
register_preference,
|
||||
JsChallengeProvider,
|
||||
JsChallengeRequest,
|
||||
JsChallengeResponse,
|
||||
JsChallengeProviderError,
|
||||
JsChallengeProviderRejectedRequest,
|
||||
JsChallengeType,
|
||||
JsChallengeProviderResponse,
|
||||
NChallengeOutput,
|
||||
)
|
||||
from yt_dlp.utils import traverse_obj, Popen
|
||||
import json
|
||||
import subprocess
|
||||
import typing
|
||||
|
||||
@register_provider
|
||||
class MyJsChallengeProviderJCP(JsChallengeProvider): # Provider class name must end with "JCP"
|
||||
PROVIDER_VERSION = '0.2.1'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'my-provider'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
# Set supported challenge types.
|
||||
# If None, the provider will handle all types.
|
||||
_SUPPORTED_TYPES = [JsChallengeType.N]
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||||
|
||||
Since this is called multiple times, we recommend caching the result.
|
||||
"""
|
||||
return True
|
||||
|
||||
def close(self):
|
||||
# Optional close hook, called when YoutubeDL is closed.
|
||||
pass
|
||||
|
||||
def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
|
||||
# ℹ️ If you need to do additional validation on the requests.
|
||||
# Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported.
|
||||
if len("something") > 255:
|
||||
raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True)
|
||||
|
||||
|
||||
# ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubejsc-<PROVIDER_KEY>`.
|
||||
# For this example, the extractor arg would be:
|
||||
# `--extractor-args "youtubejsc-myjschallengeprovider:bin_path=/path/to/bin"`
|
||||
bin_path = self._configuration_arg(
|
||||
'bin_path', default=['/path/to/bin'])[0]
|
||||
|
||||
# See below for logging guidelines
|
||||
self.logger.trace(f'Using bin path: {bin_path}')
|
||||
|
||||
for request in requests:
|
||||
# You can use the _get_player method to get the player JS code if needed.
|
||||
# This shares the same caching as the YouTube extractor, so it will not make unnecessary requests.
|
||||
player_js = self._get_player(request.video_id, request.input.player_url)
|
||||
cmd = f'{bin_path} {request.input.challenges} {player_js}'
|
||||
self.logger.info(f'Executing command: {cmd}')
|
||||
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
|
||||
if ret != 0:
|
||||
# ℹ️ If there is an error, raise JsChallengeProviderError.
|
||||
# The request will be sent to the next provider if there is one.
|
||||
# You can specify whether it is expected or not. If it is unexpected,
|
||||
# the log will include a link to the bug report location (BUG_REPORT_LOCATION).
|
||||
|
||||
# raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
|
||||
|
||||
# You can also only fail this specific request by returning a JsChallengeProviderResponse with the error.
|
||||
# This will allow other requests to be processed by this provider.
|
||||
yield JsChallengeProviderResponse(
|
||||
request=request,
|
||||
error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
|
||||
)
|
||||
|
||||
yield JsChallengeProviderResponse(
|
||||
request=request,
|
||||
response=JsChallengeResponse(
|
||||
type=JsChallengeType.N,
|
||||
output=NChallengeOutput(results=traverse_obj(json.loads(stdout))),
|
||||
))
|
||||
|
||||
|
||||
# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s),
|
||||
# you can define a preference function to increase/decrease the priority of providers.
|
||||
|
||||
@register_preference(MyJsChallengeProviderJCP)
|
||||
def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||||
return 50
|
||||
```
|
||||
|
||||
## Logging Guidelines
|
||||
|
||||
- Use the `self.logger` object to log messages.
|
||||
- When making HTTP requests or any other time-expensive operation, use `self.logger.info` to log a message to standard non-verbose output.
|
||||
- This lets users know what is happening when a time-expensive operation is taking place.
|
||||
- Technical information such as a command being executed should be logged to `self.logger.debug`
|
||||
- Use `self.logger.trace` for very detailed information that is only useful for debugging to avoid cluttering the debug log.
|
||||
|
||||
## Debugging
|
||||
|
||||
- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output.
|
||||
5
yt_dlp/extractor/youtube/jsc/__init__.py
Normal file
5
yt_dlp/extractor/youtube/jsc/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# Trigger import of built-in providers
|
||||
from ._builtin.bun import BunJCP as _BunJCP # noqa: F401
|
||||
from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401
|
||||
from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401
|
||||
from ._builtin.quickjs import QuickJSJCP as _QuickJSJCP # noqa: F401
|
||||
0
yt_dlp/extractor/youtube/jsc/_builtin/__init__.py
Normal file
0
yt_dlp/extractor/youtube/jsc/_builtin/__init__.py
Normal file
150
yt_dlp/extractor/youtube/jsc/_builtin/bun.py
Normal file
150
yt_dlp/extractor/youtube/jsc/_builtin/bun.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.ejs import (
|
||||
_EJS_WIKI_URL,
|
||||
EJSBaseJCP,
|
||||
Script,
|
||||
ScriptSource,
|
||||
ScriptType,
|
||||
ScriptVariant,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeProvider,
|
||||
JsChallengeProviderError,
|
||||
JsChallengeRequest,
|
||||
register_preference,
|
||||
register_provider,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||||
from yt_dlp.utils import Popen
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies
|
||||
|
||||
# KNOWN ISSUES:
|
||||
# - If node_modules is present and includes a requested lib, the version we request is ignored
|
||||
# and whatever installed in node_modules is used.
|
||||
# - No way to ignore existing node_modules, lock files, etc.
|
||||
# - No sandboxing options available
|
||||
# - Cannot detect if npm packages are cached without potentially downloading them.
|
||||
# `--no-install` appears to disable the cache.
|
||||
# - npm auto-install may fail with an integrity error when using HTTP proxies
|
||||
# - npm auto-install HTTP proxy support may be limited on older Bun versions
|
||||
# - Cannot disable the transpiler / specify lang for stdin
|
||||
|
||||
|
||||
@register_provider
|
||||
class BunJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'bun'
|
||||
JS_RUNTIME_NAME = 'bun'
|
||||
BUN_NPM_LIB_FILENAME = 'yt.solver.bun.lib.js'
|
||||
SUPPORTED_PROXY_SCHEMES = ['http', 'https']
|
||||
|
||||
def _iter_script_sources(self):
|
||||
yield from super()._iter_script_sources()
|
||||
yield ScriptSource.BUILTIN, self._bun_npm_source
|
||||
|
||||
def _bun_npm_source(self, script_type: ScriptType, /):
|
||||
if script_type != ScriptType.LIB:
|
||||
return None
|
||||
if 'ejs:npm' not in self.ie.get_param('remote_components', []):
|
||||
return self._skip_component('ejs:npm')
|
||||
|
||||
# Check to see if the environment proxies are compatible with Bun npm source
|
||||
if unsupported_scheme := self._check_env_proxies(self._get_env_options()):
|
||||
self.logger.warning(
|
||||
f'Bun NPM package downloads only support HTTP/HTTPS proxies; skipping remote NPM package downloads. '
|
||||
f'Provide another distribution of the challenge solver script or use '
|
||||
f'another JS runtime that supports "{unsupported_scheme}" proxies. '
|
||||
f'For more information and alternatives, refer to {_EJS_WIKI_URL}')
|
||||
return None
|
||||
|
||||
# Bun-specific lib scripts that uses Bun autoimport
|
||||
# https://bun.com/docs/runtime/autoimport
|
||||
error_hook = lambda e: self.logger.warning(
|
||||
f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}')
|
||||
code = load_script(
|
||||
self.BUN_NPM_LIB_FILENAME, error_hook=error_hook)
|
||||
if code:
|
||||
return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
|
||||
return None
|
||||
|
||||
def _check_env_proxies(self, env):
|
||||
# check that the schemes of both HTTP_PROXY and HTTPS_PROXY are supported
|
||||
for key in ('HTTP_PROXY', 'HTTPS_PROXY'):
|
||||
proxy = env.get(key)
|
||||
if not proxy:
|
||||
continue
|
||||
scheme = urllib.parse.urlparse(proxy).scheme.lower()
|
||||
if scheme not in self.SUPPORTED_PROXY_SCHEMES:
|
||||
return scheme
|
||||
return None
|
||||
|
||||
def _get_env_options(self) -> dict[str, str]:
|
||||
options = os.environ.copy() # pass through existing bun env vars
|
||||
request_proxies = self.ie._downloader.proxies.copy()
|
||||
clean_proxies(request_proxies, HTTPHeaderDict())
|
||||
|
||||
# Apply 'all' proxy first, then allow per-scheme overrides
|
||||
if request_proxies.get('all') is not None:
|
||||
options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all']
|
||||
for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY')):
|
||||
val = request_proxies.get(key)
|
||||
if val is not None:
|
||||
options[env] = val
|
||||
if self.ie.get_param('nocheckcertificate'):
|
||||
options['NODE_TLS_REJECT_UNAUTHORIZED'] = '0'
|
||||
|
||||
# Disable Bun transpiler cache
|
||||
options['BUN_RUNTIME_TRANSPILER_CACHE_PATH'] = '0'
|
||||
|
||||
# Prevent segfault: <https://github.com/oven-sh/bun/issues/22901>
|
||||
options.pop('JSC_useJIT', None)
|
||||
if self.ejs_setting('jitless', ['false']) != ['false']:
|
||||
options['BUN_JSC_useJIT'] = '0'
|
||||
|
||||
return options
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
# https://bun.com/docs/cli/run
|
||||
options = ['--no-addons', '--prefer-offline']
|
||||
if self._lib_script.variant == ScriptVariant.BUN_NPM:
|
||||
# Enable auto-install even if node_modules is present
|
||||
options.append('--install=fallback')
|
||||
else:
|
||||
options.append('--no-install')
|
||||
cmd = [self.runtime_info.path, '--bun', 'run', *options, '-']
|
||||
self.logger.debug(f'Running bun: {shlex.join(cmd)}')
|
||||
|
||||
with Popen(
|
||||
cmd,
|
||||
text=True,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env=self._get_env_options(),
|
||||
) as proc:
|
||||
stdout, stderr = proc.communicate_or_kill(stdin)
|
||||
stderr = self._clean_stderr(stderr)
|
||||
if proc.returncode or stderr:
|
||||
msg = f'Error running bun process (returncode: {proc.returncode})'
|
||||
if stderr:
|
||||
msg = f'{msg}: {stderr.strip()}'
|
||||
raise JsChallengeProviderError(msg)
|
||||
return stdout
|
||||
|
||||
def _clean_stderr(self, stderr):
|
||||
return '\n'.join(
|
||||
line for line in stderr.splitlines()
|
||||
if not re.match(r'^Bun v\d+\.\d+\.\d+ \([\w\s]+\)$', line))
|
||||
|
||||
|
||||
@register_preference(BunJCP)
|
||||
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||||
return 800
|
||||
131
yt_dlp/extractor/youtube/jsc/_builtin/deno.py
Normal file
131
yt_dlp/extractor/youtube/jsc/_builtin/deno.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.ejs import (
|
||||
EJSBaseJCP,
|
||||
Script,
|
||||
ScriptSource,
|
||||
ScriptType,
|
||||
ScriptVariant,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeProvider,
|
||||
JsChallengeProviderError,
|
||||
JsChallengeRequest,
|
||||
register_preference,
|
||||
register_provider,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||||
from yt_dlp.utils import Popen, remove_terminal_sequences
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies
|
||||
|
||||
# KNOWN ISSUES:
|
||||
# - Can't avoid analysis cache: https://github.com/yt-dlp/yt-dlp/pull/14849#issuecomment-3475840821
|
||||
|
||||
|
||||
@register_provider
|
||||
class DenoJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'deno'
|
||||
JS_RUNTIME_NAME = 'deno'
|
||||
|
||||
_DENO_BASE_OPTIONS = [
|
||||
'--ext=js', '--no-code-cache', '--no-prompt', '--no-remote',
|
||||
'--no-lock', '--node-modules-dir=none', '--no-config',
|
||||
]
|
||||
DENO_NPM_LIB_FILENAME = 'yt.solver.deno.lib.js'
|
||||
_NPM_PACKAGES_CACHED = False
|
||||
|
||||
def _iter_script_sources(self):
|
||||
yield from super()._iter_script_sources()
|
||||
yield ScriptSource.BUILTIN, self._deno_npm_source
|
||||
|
||||
def _deno_npm_source(self, script_type: ScriptType, /):
|
||||
if script_type != ScriptType.LIB:
|
||||
return None
|
||||
# Deno-specific lib scripts that use Deno NPM imports
|
||||
error_hook = lambda e: self.logger.warning(
|
||||
f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}')
|
||||
code = load_script(
|
||||
self.DENO_NPM_LIB_FILENAME, error_hook=error_hook)
|
||||
if not code:
|
||||
return None
|
||||
if 'ejs:npm' not in self.ie.get_param('remote_components', []):
|
||||
# We may still be able to continue if the npm packages are available/cached
|
||||
self._NPM_PACKAGES_CACHED = self._npm_packages_cached(code)
|
||||
if not self._NPM_PACKAGES_CACHED:
|
||||
return self._skip_component('ejs:npm')
|
||||
return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
|
||||
|
||||
def _npm_packages_cached(self, stdin: str) -> bool:
|
||||
# Check if npm packages are cached, so we can run without --remote-components ejs:npm
|
||||
self.logger.debug('Checking if npm packages are cached')
|
||||
try:
|
||||
self._run_deno(stdin, [*self._DENO_BASE_OPTIONS, '--cached-only'])
|
||||
except JsChallengeProviderError as e:
|
||||
self.logger.trace(f'Deno npm packages not cached: {e}')
|
||||
return False
|
||||
return True
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
options = [*self._DENO_BASE_OPTIONS]
|
||||
if self._lib_script.variant == ScriptVariant.DENO_NPM and self._NPM_PACKAGES_CACHED:
|
||||
options.append('--cached-only')
|
||||
elif self._lib_script.variant != ScriptVariant.DENO_NPM:
|
||||
options.append('--no-npm')
|
||||
options.append('--cached-only')
|
||||
if self.ie.get_param('nocheckcertificate'):
|
||||
options.append('--unsafely-ignore-certificate-errors')
|
||||
# XXX: Convert this extractor-arg into a general option if/when a JSI framework is implemented
|
||||
if self.ejs_setting('jitless', ['false']) != ['false']:
|
||||
options.append('--v8-flags=--jitless')
|
||||
return self._run_deno(stdin, options)
|
||||
|
||||
def _get_env_options(self) -> dict[str, str]:
|
||||
options = os.environ.copy() # pass through existing deno env vars
|
||||
request_proxies = self.ie._downloader.proxies.copy()
|
||||
clean_proxies(request_proxies, HTTPHeaderDict())
|
||||
# Apply 'all' proxy first, then allow per-scheme overrides
|
||||
if 'all' in request_proxies and request_proxies['all'] is not None:
|
||||
options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all']
|
||||
for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY'), ('no', 'NO_PROXY')):
|
||||
if key in request_proxies and request_proxies[key] is not None:
|
||||
options[env] = request_proxies[key]
|
||||
return options
|
||||
|
||||
def _run_deno(self, stdin, options) -> str:
|
||||
cmd = [self.runtime_info.path, 'run', *options, '-']
|
||||
self.logger.debug(f'Running deno: {shlex.join(cmd)}')
|
||||
with Popen(
|
||||
cmd,
|
||||
text=True,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env=self._get_env_options(),
|
||||
) as proc:
|
||||
stdout, stderr = proc.communicate_or_kill(stdin)
|
||||
stderr = self._clean_stderr(stderr)
|
||||
if proc.returncode or stderr:
|
||||
msg = f'Error running deno process (returncode: {proc.returncode})'
|
||||
if stderr:
|
||||
msg = f'{msg}: {stderr.strip()}'
|
||||
raise JsChallengeProviderError(msg)
|
||||
return stdout
|
||||
|
||||
def _clean_stderr(self, stderr):
|
||||
return '\n'.join(
|
||||
line for line in stderr.splitlines()
|
||||
if not (
|
||||
re.match(r'^Download\s+https\S+$', remove_terminal_sequences(line))
|
||||
or re.match(r'DANGER: TLS certificate validation is disabled for all hostnames', remove_terminal_sequences(line))))
|
||||
|
||||
|
||||
@register_preference(DenoJCP)
|
||||
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||||
return 1000
|
||||
328
yt_dlp/extractor/youtube/jsc/_builtin/ejs.py
Normal file
328
yt_dlp/extractor/youtube/jsc/_builtin/ejs.py
Normal file
@@ -0,0 +1,328 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import dataclasses
|
||||
import enum
|
||||
import functools
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from yt_dlp.dependencies import yt_dlp_ejs as _has_ejs
|
||||
from yt_dlp.extractor.youtube.jsc._builtin import vendor
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeProvider,
|
||||
JsChallengeProviderError,
|
||||
JsChallengeProviderRejectedRequest,
|
||||
JsChallengeProviderResponse,
|
||||
JsChallengeResponse,
|
||||
JsChallengeType,
|
||||
NChallengeOutput,
|
||||
SigChallengeOutput,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import configuration_arg
|
||||
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||||
from yt_dlp.utils import version_tuple
|
||||
from yt_dlp.utils._jsruntime import JsRuntimeInfo
|
||||
|
||||
if _has_ejs:
|
||||
import yt_dlp_ejs.yt.solver
|
||||
|
||||
TYPE_CHECKING = False
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable, Generator
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest
|
||||
|
||||
_EJS_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/EJS'
|
||||
|
||||
|
||||
class ScriptType(enum.Enum):
|
||||
LIB = 'lib'
|
||||
CORE = 'core'
|
||||
|
||||
|
||||
class ScriptVariant(enum.Enum):
|
||||
UNKNOWN = 'unknown'
|
||||
MINIFIED = 'minified'
|
||||
UNMINIFIED = 'unminified'
|
||||
DENO_NPM = 'deno_npm'
|
||||
BUN_NPM = 'bun_npm'
|
||||
|
||||
|
||||
class ScriptSource(enum.Enum):
|
||||
PYPACKAGE = 'python package' # PyPI, PyInstaller exe, zipimport binary, etc
|
||||
CACHE = 'cache' # GitHub release assets (cached)
|
||||
WEB = 'web' # GitHub release assets (downloaded)
|
||||
BUILTIN = 'builtin' # vendored (full core script; import-only lib script + NPM cache)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Script:
|
||||
type: ScriptType
|
||||
variant: ScriptVariant
|
||||
source: ScriptSource
|
||||
version: str
|
||||
code: str
|
||||
|
||||
@functools.cached_property
|
||||
def hash(self, /) -> str:
|
||||
return hashlib.sha3_512(self.code.encode()).hexdigest()
|
||||
|
||||
def __str__(self, /):
|
||||
return f'<Script {self.type.value!r} v{self.version} (source: {self.source.value}) variant={self.variant.value!r} size={len(self.code)} hash={self.hash[:7]}...>'
|
||||
|
||||
|
||||
class EJSBaseJCP(JsChallengeProvider):
|
||||
JS_RUNTIME_NAME: str
|
||||
_CACHE_SECTION = 'challenge-solver'
|
||||
|
||||
_REPOSITORY = 'yt-dlp/ejs'
|
||||
_SUPPORTED_TYPES = [JsChallengeType.N, JsChallengeType.SIG]
|
||||
_SCRIPT_VERSION = vendor.VERSION
|
||||
# TODO: Integration tests for each kind of scripts source
|
||||
_ALLOWED_HASHES = {
|
||||
ScriptType.LIB: {
|
||||
ScriptVariant.UNMINIFIED: vendor.HASHES['yt.solver.lib.js'],
|
||||
ScriptVariant.MINIFIED: vendor.HASHES['yt.solver.lib.min.js'],
|
||||
ScriptVariant.DENO_NPM: vendor.HASHES['yt.solver.deno.lib.js'],
|
||||
ScriptVariant.BUN_NPM: vendor.HASHES['yt.solver.bun.lib.js'],
|
||||
},
|
||||
ScriptType.CORE: {
|
||||
ScriptVariant.MINIFIED: vendor.HASHES['yt.solver.core.min.js'],
|
||||
ScriptVariant.UNMINIFIED: vendor.HASHES['yt.solver.core.js'],
|
||||
},
|
||||
}
|
||||
|
||||
_SCRIPT_FILENAMES = {
|
||||
ScriptType.LIB: 'yt.solver.lib.js',
|
||||
ScriptType.CORE: 'yt.solver.core.js',
|
||||
}
|
||||
|
||||
_MIN_SCRIPT_FILENAMES = {
|
||||
ScriptType.LIB: 'yt.solver.lib.min.js',
|
||||
ScriptType.CORE: 'yt.solver.core.min.js',
|
||||
}
|
||||
|
||||
# currently disabled as files are large and we do not support rotation
|
||||
_ENABLE_PREPROCESSED_PLAYER_CACHE = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._available = True
|
||||
self.ejs_settings = self.ie.get_param('extractor_args', {}).get('youtube-ejs', {})
|
||||
|
||||
# Note: The following 3 args are for developer use only & intentionally not documented.
|
||||
# - dev: bypasses verification of script hashes and versions.
|
||||
# - repo: use a custom GitHub repository to fetch web script from.
|
||||
# - script_version: use a custom script version.
|
||||
# E.g. --extractor-args "youtube-ejs:dev=true;script_version=0.1.4"
|
||||
|
||||
self.is_dev = self.ejs_setting('dev', ['false'])[0] == 'true'
|
||||
if self.is_dev:
|
||||
self.report_dev_option('You have enabled dev mode for EJS JCP Providers.')
|
||||
|
||||
custom_repo = self.ejs_setting('repo', [None])[0]
|
||||
if custom_repo:
|
||||
self.report_dev_option(f'You have set a custom GitHub repository for EJS JCP Providers ({custom_repo}).')
|
||||
self._REPOSITORY = custom_repo
|
||||
|
||||
custom_version = self.ejs_setting('script_version', [None])[0]
|
||||
if custom_version:
|
||||
self.report_dev_option(f'You have set a custom EJS script version for EJS JCP Providers ({custom_version}).')
|
||||
self._SCRIPT_VERSION = custom_version
|
||||
|
||||
def ejs_setting(self, key, *args, **kwargs):
|
||||
return configuration_arg(self.ejs_settings, key, *args, **kwargs)
|
||||
|
||||
def report_dev_option(self, message: str):
|
||||
self.ie.report_warning(
|
||||
f'{message} '
|
||||
f'This is a developer option intended for debugging. \n'
|
||||
' If you experience any issues while using this option, '
|
||||
f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} open a bug report', only_once=True)
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
"""To be implemented by subclasses"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _real_bulk_solve(self, /, requests: list[JsChallengeRequest]):
|
||||
grouped: dict[str, list[JsChallengeRequest]] = collections.defaultdict(list)
|
||||
for request in requests:
|
||||
grouped[request.input.player_url].append(request)
|
||||
|
||||
for player_url, grouped_requests in grouped.items():
|
||||
player = None
|
||||
if self._ENABLE_PREPROCESSED_PLAYER_CACHE:
|
||||
player = self.ie.cache.load(self._CACHE_SECTION, f'player:{player_url}')
|
||||
|
||||
if player:
|
||||
cached = True
|
||||
else:
|
||||
cached = False
|
||||
video_id = next((request.video_id for request in grouped_requests), None)
|
||||
player = self._get_player(video_id, player_url)
|
||||
|
||||
# NB: This output belongs after the player request
|
||||
self.logger.info(f'Solving JS challenges using {self.JS_RUNTIME_NAME}')
|
||||
|
||||
stdin = self._construct_stdin(player, cached, grouped_requests)
|
||||
stdout = self._run_js_runtime(stdin)
|
||||
output = json.loads(stdout)
|
||||
if output['type'] == 'error':
|
||||
raise JsChallengeProviderError(output['error'])
|
||||
|
||||
if self._ENABLE_PREPROCESSED_PLAYER_CACHE and (preprocessed := output.get('preprocessed_player')):
|
||||
self.ie.cache.store(self._CACHE_SECTION, f'player:{player_url}', preprocessed)
|
||||
|
||||
for request, response_data in zip(grouped_requests, output['responses'], strict=True):
|
||||
if response_data['type'] == 'error':
|
||||
yield JsChallengeProviderResponse(request, None, response_data['error'])
|
||||
else:
|
||||
yield JsChallengeProviderResponse(request, JsChallengeResponse(request.type, (
|
||||
NChallengeOutput(response_data['data']) if request.type is JsChallengeType.N
|
||||
else SigChallengeOutput(response_data['data']))))
|
||||
|
||||
def _construct_stdin(self, player: str, preprocessed: bool, requests: list[JsChallengeRequest], /) -> str:
|
||||
json_requests = [{
|
||||
'type': request.type.value,
|
||||
'challenges': request.input.challenges,
|
||||
} for request in requests]
|
||||
data = {
|
||||
'type': 'preprocessed',
|
||||
'preprocessed_player': player,
|
||||
'requests': json_requests,
|
||||
} if preprocessed else {
|
||||
'type': 'player',
|
||||
'player': player,
|
||||
'requests': json_requests,
|
||||
'output_preprocessed': True,
|
||||
}
|
||||
return f'''\
|
||||
{self._lib_script.code}
|
||||
Object.assign(globalThis, lib);
|
||||
{self._core_script.code}
|
||||
console.log(JSON.stringify(jsc({json.dumps(data)})));
|
||||
'''
|
||||
|
||||
# region: challenge solver script
|
||||
|
||||
@functools.cached_property
|
||||
def _lib_script(self, /):
|
||||
return self._get_script(ScriptType.LIB)
|
||||
|
||||
@functools.cached_property
|
||||
def _core_script(self, /):
|
||||
return self._get_script(ScriptType.CORE)
|
||||
|
||||
def _get_script(self, script_type: ScriptType, /) -> Script:
|
||||
skipped_components: list[_SkippedComponent] = []
|
||||
for _, from_source in self._iter_script_sources():
|
||||
script = from_source(script_type)
|
||||
if not script:
|
||||
continue
|
||||
if isinstance(script, _SkippedComponent):
|
||||
skipped_components.append(script)
|
||||
continue
|
||||
if not self.is_dev:
|
||||
# Matching patch version is expected to have same hash
|
||||
if version_tuple(script.version, lenient=True)[:2] != version_tuple(self._SCRIPT_VERSION, lenient=True)[:2]:
|
||||
self.logger.warning(
|
||||
f'Challenge solver {script_type.value} script version {script.version} '
|
||||
f'is not supported (source: {script.source.value}, variant: {script.variant}, supported version: {self._SCRIPT_VERSION})')
|
||||
if script.source is ScriptSource.CACHE:
|
||||
self.logger.debug('Clearing outdated cached script')
|
||||
self.ie.cache.store(self._CACHE_SECTION, script_type.value, None)
|
||||
continue
|
||||
script_hashes = self._ALLOWED_HASHES[script.type].get(script.variant, [])
|
||||
if script_hashes and script.hash not in script_hashes:
|
||||
self.logger.warning(
|
||||
f'Hash mismatch on challenge solver {script.type.value} script '
|
||||
f'(source: {script.source.value}, variant: {script.variant}, hash: {script.hash})!{provider_bug_report_message(self)}')
|
||||
if script.source is ScriptSource.CACHE:
|
||||
self.logger.debug('Clearing invalid cached script')
|
||||
self.ie.cache.store(self._CACHE_SECTION, script_type.value, None)
|
||||
continue
|
||||
self.logger.debug(
|
||||
f'Using challenge solver {script.type.value} script v{script.version} '
|
||||
f'(source: {script.source.value}, variant: {script.variant.value})')
|
||||
break
|
||||
|
||||
else:
|
||||
self._available = False
|
||||
raise JsChallengeProviderRejectedRequest(
|
||||
f'No usable challenge solver {script_type.value} script available',
|
||||
_skipped_components=skipped_components or None,
|
||||
)
|
||||
|
||||
return script
|
||||
|
||||
def _iter_script_sources(self) -> Generator[tuple[ScriptSource, Callable[[ScriptType], Script | None]]]:
|
||||
yield from [
|
||||
(ScriptSource.PYPACKAGE, self._pypackage_source),
|
||||
(ScriptSource.CACHE, self._cached_source),
|
||||
(ScriptSource.BUILTIN, self._builtin_source),
|
||||
(ScriptSource.WEB, self._web_release_source)]
|
||||
|
||||
def _pypackage_source(self, script_type: ScriptType, /) -> Script | None:
|
||||
if not _has_ejs:
|
||||
return None
|
||||
try:
|
||||
code = yt_dlp_ejs.yt.solver.core() if script_type is ScriptType.CORE else yt_dlp_ejs.yt.solver.lib()
|
||||
except Exception as e:
|
||||
self.logger.warning(
|
||||
f'Failed to load challenge solver {script_type.value} script from python package: {e}{provider_bug_report_message(self)}')
|
||||
return None
|
||||
return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.PYPACKAGE, yt_dlp_ejs.version, code)
|
||||
|
||||
def _cached_source(self, script_type: ScriptType, /) -> Script | None:
|
||||
if data := self.ie.cache.load(self._CACHE_SECTION, script_type.value):
|
||||
return Script(script_type, ScriptVariant(data['variant']), ScriptSource.CACHE, data['version'], data['code'])
|
||||
return None
|
||||
|
||||
def _builtin_source(self, script_type: ScriptType, /) -> Script | None:
|
||||
error_hook = lambda _: self.logger.warning(
|
||||
f'Failed to read builtin challenge solver {script_type.value} script{provider_bug_report_message(self)}')
|
||||
code = vendor.load_script(
|
||||
self._SCRIPT_FILENAMES[script_type], error_hook=error_hook)
|
||||
if code:
|
||||
return Script(script_type, ScriptVariant.UNMINIFIED, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
|
||||
return None
|
||||
|
||||
def _web_release_source(self, script_type: ScriptType, /):
|
||||
if 'ejs:github' not in (self.ie.get_param('remote_components') or ()):
|
||||
return self._skip_component('ejs:github')
|
||||
url = f'https://github.com/{self._REPOSITORY}/releases/download/{self._SCRIPT_VERSION}/{self._MIN_SCRIPT_FILENAMES[script_type]}'
|
||||
if code := self.ie._download_webpage_with_retries(
|
||||
url, None, f'[{self.logger.prefix}] Downloading challenge solver {script_type.value} script from {url}',
|
||||
f'[{self.logger.prefix}] Failed to download challenge solver {script_type.value} script', fatal=False,
|
||||
):
|
||||
self.ie.cache.store(self._CACHE_SECTION, script_type.value, {
|
||||
'version': self._SCRIPT_VERSION,
|
||||
'variant': ScriptVariant.MINIFIED.value,
|
||||
'code': code,
|
||||
})
|
||||
return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.WEB, self._SCRIPT_VERSION, code)
|
||||
return None
|
||||
|
||||
# endregion: challenge solver script
|
||||
|
||||
@property
|
||||
def runtime_info(self) -> JsRuntimeInfo | None:
|
||||
runtime = self.ie._downloader._js_runtimes.get(self.JS_RUNTIME_NAME)
|
||||
if not runtime or not runtime.info or not runtime.info.supported:
|
||||
return None
|
||||
return runtime.info
|
||||
|
||||
def is_available(self, /) -> bool:
|
||||
if not self.runtime_info:
|
||||
return False
|
||||
return self._available
|
||||
|
||||
def _skip_component(self, component: str, /):
|
||||
return _SkippedComponent(component, self.JS_RUNTIME_NAME)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class _SkippedComponent:
|
||||
component: str
|
||||
runtime: str
|
||||
70
yt_dlp/extractor/youtube/jsc/_builtin/node.py
Normal file
70
yt_dlp/extractor/youtube/jsc/_builtin/node.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from yt_dlp.extractor.youtube.jsc._builtin.ejs import EJSBaseJCP
|
||||
from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
JsChallengeProvider,
|
||||
JsChallengeProviderError,
|
||||
JsChallengeRequest,
|
||||
register_preference,
|
||||
register_provider,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.utils import Popen
|
||||
|
||||
|
||||
@register_provider
|
||||
class NodeJCP(EJSBaseJCP, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'node'
|
||||
JS_RUNTIME_NAME = 'node'
|
||||
|
||||
_ARGS = ['-']
|
||||
|
||||
def _run_js_runtime(self, stdin: str, /) -> str:
|
||||
args = []
|
||||
|
||||
if self.ejs_setting('jitless', ['false']) != ['false']:
|
||||
args.append('--v8-flags=--jitless')
|
||||
|
||||
# Node permission flag changed from experimental to stable in v23.5.0
|
||||
if self.runtime_info.version_tuple < (23, 5, 0):
|
||||
args.append('--experimental-permission')
|
||||
args.append('--no-warnings=ExperimentalWarning')
|
||||
else:
|
||||
args.append('--permission')
|
||||
|
||||
cmd = [self.runtime_info.path, *args, *self._ARGS]
|
||||
self.logger.debug(f'Running node: {shlex.join(cmd)}')
|
||||
with Popen(
|
||||
cmd,
|
||||
text=True,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
) as proc:
|
||||
stdout, stderr = proc.communicate_or_kill(stdin)
|
||||
stderr = self._clean_stderr(stderr)
|
||||
if proc.returncode or stderr:
|
||||
msg = f'Error running node process (returncode: {proc.returncode})'
|
||||
if stderr:
|
||||
msg = f'{msg}: {stderr.strip()}'
|
||||
raise JsChallengeProviderError(msg)
|
||||
|
||||
return stdout
|
||||
|
||||
def _clean_stderr(self, stderr):
|
||||
return '\n'.join(
|
||||
line for line in stderr.splitlines()
|
||||
if not (
|
||||
re.match(r'^\[stdin\]:', line)
|
||||
or re.match(r'^var jsc', line)
|
||||
or '(Use `node --trace-uncaught ...` to show where the exception was thrown)' == line
|
||||
or re.match(r'^Node\.js v\d+\.\d+\.\d+$', line)))
|
||||
|
||||
|
||||
@register_preference(NodeJCP)
|
||||
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
|
||||
return 900
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user