mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-16 06:05:41 +07:00
Compare commits
32 Commits
2025.11.12
...
017d76edcf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
017d76edcf | ||
|
|
56ea3a00ea | ||
|
|
2a777ecbd5 | ||
|
|
023e4db9af | ||
|
|
4433b3a217 | ||
|
|
419776ecf5 | ||
|
|
2801650268 | ||
|
|
26c2545b87 | ||
|
|
12d411722a | ||
|
|
e564b4a808 | ||
|
|
715af0c636 | ||
|
|
0c696239ef | ||
|
|
3cb5e4db54 | ||
|
|
6842620d56 | ||
|
|
20f83f208e | ||
|
|
c2e7e9cdb2 | ||
|
|
2c9f0c3456 | ||
|
|
0eed3fe530 | ||
|
|
a4c72acc46 | ||
|
|
9daba4f442 | ||
|
|
854fded114 | ||
|
|
5f66ac71f6 | ||
|
|
4cb5e191ef | ||
|
|
6ee6a6fc58 | ||
|
|
23f1ab3469 | ||
|
|
af285016d2 | ||
|
|
1dd84b9d1c | ||
|
|
b333ef1b3f | ||
|
|
4e680db150 | ||
|
|
45a3b42bb9 | ||
|
|
d6aa8c235d | ||
|
|
947e788340 |
18
.github/workflows/build.yml
vendored
18
.github/workflows/build.yml
vendored
@@ -422,23 +422,23 @@ jobs:
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win_amd64
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: b6496c7630c3afe66900cfa824e8234a8c2e2c81704bd7facd79586abc76c0e5
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: e28cc13e4ad0cc74330d832202806d0c1976e9165da6047309348ca663c0ed3d
|
||||
- arch: 'x86'
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win32
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 2d881843580efdc54f3523507fc6d9c5b6051ee49c743a6d9b7003ac5758c226
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: c00f600c17de3bdd589f043f60ab64fc34fcba6dd902ad973af9c8afc74f80d1
|
||||
- arch: 'arm64'
|
||||
runner: windows-11-arm
|
||||
python_version: '3.13' # arm64 only has Python >= 3.11 available
|
||||
platform_tag: win_arm64
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 4250c9085e34a95c898f3ee2f764914fc36ec59f0d97c28e6a75fcf21f7b144f
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: a2033b18b4f7bc6108b5fd76a92c6c1de0a12ec4fe98a23396a9f978cb4b7d7b
|
||||
env:
|
||||
CHANNEL: ${{ inputs.channel }}
|
||||
ORIGIN: ${{ needs.process.outputs.origin }}
|
||||
|
||||
@@ -1870,7 +1870,6 @@ #### youtube
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
|
||||
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
|
||||
|
||||
#### youtube-ejs
|
||||
|
||||
@@ -69,7 +69,7 @@ build = [
|
||||
"build",
|
||||
"hatchling>=1.27.0",
|
||||
"pip",
|
||||
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
"setuptools>=71.0.2",
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
@@ -86,7 +86,7 @@ test = [
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0
|
||||
"pyinstaller>=6.17.0", # 6.17.0+ needed for compat with setuptools 81+
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -755,6 +755,17 @@ def test_partial_read_then_full_read(self, handler):
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
@@ -920,6 +931,28 @@ def test_http_response_auto_close(self, handler):
|
||||
assert res.fp.fp is None
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(6) == b'hello '
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'world'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(512) == b'hello world'
|
||||
# Response and its underlying file object should already be closed now
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_http_error_returns_content(self, handler):
|
||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||
def get_response():
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
pytestmark = pytest.mark.handler_flaky(
|
||||
'Websockets',
|
||||
os.name != 'nt' and sys.implementation.name == 'pypy',
|
||||
os.name == 'nt' or sys.implementation.name == 'pypy',
|
||||
reason='segfaults',
|
||||
)
|
||||
|
||||
|
||||
@@ -461,7 +461,8 @@ def download(self, filename, info_dict, subtitle=False):
|
||||
min_sleep_interval = self.params.get('sleep_interval') or 0
|
||||
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
||||
|
||||
if available_at := info_dict.get('available_at'):
|
||||
requested_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
if available_at := max(f.get('available_at') or 0 for f in requested_formats):
|
||||
forced_sleep_interval = available_at - int(time.time())
|
||||
if forced_sleep_interval > min_sleep_interval:
|
||||
sleep_note = 'as required by the site'
|
||||
|
||||
@@ -457,6 +457,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
|
||||
@@ -75,6 +75,7 @@
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .agalega import AGalegaIE
|
||||
from .agora import (
|
||||
TokFMAuditionIE,
|
||||
TokFMPodcastIE,
|
||||
@@ -268,6 +269,7 @@
|
||||
BitChuteChannelIE,
|
||||
BitChuteIE,
|
||||
)
|
||||
from .bitmovin import BitmovinIE
|
||||
from .blackboardcollaborate import (
|
||||
BlackboardCollaborateIE,
|
||||
BlackboardCollaborateLaunchIE,
|
||||
@@ -690,6 +692,10 @@
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .frontro import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
@@ -1093,7 +1099,10 @@
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import MaveIE
|
||||
from .mave import (
|
||||
MaveChannelIE,
|
||||
MaveIE,
|
||||
)
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@@ -1276,6 +1285,10 @@
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .netapp import (
|
||||
NetAppCollectionIE,
|
||||
NetAppVideoIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@@ -1368,6 +1381,7 @@
|
||||
NovaIE,
|
||||
)
|
||||
from .novaplay import NovaPlayIE
|
||||
from .nowcanal import NowCanalIE
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
@@ -2521,6 +2535,7 @@
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yfanefa import YfanefaIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
|
||||
91
yt_dlp/extractor/agalega.py
Normal file
91
yt_dlp/extractor/agalega.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import jwt_decode_hs256, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AGalegaBaseIE(InfoExtractor):
|
||||
_access_token = None
|
||||
|
||||
@staticmethod
|
||||
def _jwt_is_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _refresh_access_token(self, video_id):
|
||||
AGalegaBaseIE._access_token = self._download_json(
|
||||
'https://www.agalega.gal/api/fetch-api/jwt/token', video_id,
|
||||
note='Downloading access token',
|
||||
data=json.dumps({
|
||||
'username': None,
|
||||
'password': None,
|
||||
'client': 'crtvg',
|
||||
'checkExistsCookies': False,
|
||||
}).encode())['access']
|
||||
|
||||
def _call_api(self, endpoint, display_id, note, fatal=True, query=None):
|
||||
if not AGalegaBaseIE._access_token or self._jwt_is_expired(AGalegaBaseIE._access_token):
|
||||
self._refresh_access_token(endpoint)
|
||||
return self._download_json(
|
||||
f'https://api-agalega.interactvty.com/api/2.0/contents/{endpoint}', display_id,
|
||||
note=note, fatal=fatal, query=query,
|
||||
headers={'Authorization': f'jwtok {AGalegaBaseIE._access_token}'})
|
||||
|
||||
|
||||
class AGalegaIE(AGalegaBaseIE):
|
||||
IE_NAME = 'agalega:videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?agalega\.gal/videos/(?:detail/)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.agalega.gal/videos/288664-lr-ninguencheconta',
|
||||
'md5': '04533a66c5f863d08dd9724b11d1c223',
|
||||
'info_dict': {
|
||||
'id': '288664',
|
||||
'title': 'Roberto e Ángel Martín atenden consultas dos espectadores',
|
||||
'description': 'O cómico ademais fai un repaso dalgúns momentos da súa traxectoria profesional',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/2ef32c3b9f6249d9868fd8f11d389d3d.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.agalega.gal/videos/detail/296152-pulso-activo-7',
|
||||
'md5': '26df7fdcf859f38ad92d837279d6b56d',
|
||||
'info_dict': {
|
||||
'id': '296152',
|
||||
'title': 'Pulso activo | 18-11-2025',
|
||||
'description': 'Anxo, Noemí, Silvia e Estrella comparten as sensacións da clase de Eddy.',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/a6bb7da6c8994b82bf961ac6cad1707b.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content_data = self._call_api(
|
||||
f'content/{video_id}/', video_id, note='Downloading content data', fatal=False,
|
||||
query={
|
||||
'optional_fields': 'image,is_premium,short_description,has_subtitle',
|
||||
})
|
||||
resource_data = self._call_api(
|
||||
f'content_resources/{video_id}/', video_id, note='Downloading resource data',
|
||||
query={
|
||||
'optional_fields': 'media_url',
|
||||
})
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for m3u8_url in traverse_obj(resource_data, ('results', ..., 'media_url', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('description', 'short_description'), {str}, any),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
74
yt_dlp/extractor/bitmovin.py
Normal file
74
yt_dlp/extractor/bitmovin.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitmovinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streams\.bitmovin\.com/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//streams\.bitmovin\.com/(?P<id>\w+)[^"\']+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/embed',
|
||||
'info_dict': {
|
||||
'id': 'cqkl1t5giv3lrce7pjbg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developing Osteopathic Residents as Faculty',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/share',
|
||||
'info_dict': {
|
||||
'id': 'cgl9rh94uvs51rqc8jhg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny (Streams Docs)',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# bitmovin-stream web component
|
||||
'url': 'https://www.institutionalinvestor.com/article/2bsw1in1l9k68mp9kritc/video-war-stories-over-board-games/best-case-i-get-fired-war-stories',
|
||||
'info_dict': {
|
||||
'id': 'cuiumeil6g115lc4li3g',
|
||||
'ext': 'mp4',
|
||||
'title': '[media] War Stories over Board Games: “Best Case: I Get Fired” ',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cuiumeil6g115lc4li3g/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'https://www.clearblueionizer.com/en/pool-ionizers/mineral-pool-vs-saltwater-pool/',
|
||||
'info_dict': {
|
||||
'id': 'cvpvfsm1pf7itg7cfvtg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pool Ionizer vs. Salt Chlorinator',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cvpvfsm1pf7itg7cfvtg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for stream_id in re.findall(r'<bitmovin-stream\b[^>]*\bstream-id=["\'](?P<id>\w+)', webpage):
|
||||
yield f'https://streams.bitmovin.com/{stream_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
f'https://streams.bitmovin.com/{video_id}/config', video_id)['sources']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
player_config['hls'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(player_config, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('poster', {str}),
|
||||
}),
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DigitekaIE(InfoExtractor):
|
||||
@@ -25,74 +26,56 @@ class DigitekaIE(InfoExtractor):
|
||||
)/(?P<id>[\d+a-z]+)'''
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
|
||||
_TESTS = [{
|
||||
# news
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/3x5x55k',
|
||||
'info_dict': {
|
||||
'id': 's8uk0r',
|
||||
'id': '3x5x55k',
|
||||
'ext': 'mp4',
|
||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||
'title': 'Il est passionné de DS',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 74,
|
||||
'upload_date': '20150317',
|
||||
'timestamp': 1426604939,
|
||||
'uploader_id': '3fszv',
|
||||
'duration': 89,
|
||||
'upload_date': '20251012',
|
||||
'timestamp': 1760285363,
|
||||
'uploader_id': '3pz33',
|
||||
},
|
||||
}, {
|
||||
# music
|
||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||
'info_dict': {
|
||||
'id': 'xvpfp8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Two - C\'est La Vie (clip)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 233,
|
||||
'upload_date': '20150224',
|
||||
'timestamp': 1424760500,
|
||||
'uploader_id': '3rfzk',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
||||
'only_matching': True,
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
_IFRAME_MD_ID = '01836272' # One static ID working for Ultimedia iframes
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('embed_type') or mobj.group('site_type')
|
||||
if video_type == 'music':
|
||||
video_type = 'musique'
|
||||
video_id = self._match_id(url)
|
||||
|
||||
deliver_info = self._download_json(
|
||||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||
video_id)
|
||||
|
||||
yt_id = deliver_info.get('yt_id')
|
||||
if yt_id:
|
||||
return self.url_result(yt_id, 'Youtube')
|
||||
|
||||
jwconf = deliver_info['jwconf']
|
||||
video_info = self._download_json(
|
||||
f'https://www.ultimedia.com/player/getConf/{self._IFRAME_MD_ID}/1/{video_id}', video_id,
|
||||
note='Downloading player configuration')['video']
|
||||
|
||||
formats = []
|
||||
for source in jwconf['playlist'][0]['sources']:
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
})
|
||||
subtitles = {}
|
||||
|
||||
title = deliver_info['title']
|
||||
thumbnail = jwconf.get('image')
|
||||
duration = int_or_none(deliver_info.get('duration'))
|
||||
timestamp = int_or_none(deliver_info.get('release_time'))
|
||||
uploader_id = deliver_info.get('owner_id')
|
||||
if hls_url := traverse_obj(video_info, ('media_sources', 'hls', 'hls_auto', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
for format_id, mp4_url in traverse_obj(video_info, ('media_sources', 'mp4', {dict.items}, ...)):
|
||||
if not mp4_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id.partition('_')[2]),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('creationDate', {int_or_none}),
|
||||
'uploader_id': ('ownerId', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -205,6 +206,9 @@ def _real_extract(self, url):
|
||||
'client_app': 'browser_hls',
|
||||
'ipv6': '',
|
||||
}), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
# A non-zero 'status' indicates the stream is not live, so check truthiness
|
||||
if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server:
|
||||
raise UserNotLive(video_id=video_id)
|
||||
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
|
||||
|
||||
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
|
||||
|
||||
@@ -109,6 +109,17 @@ def _real_extract(self, url):
|
||||
'hls_media_playlist_data': m3u8_data,
|
||||
'hls_aes': hls_aes or None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for sub_data in traverse_obj(metadata, ('textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
sub_lang = sub_data.get('language') or 'en'
|
||||
sub_entry = {'url': sub_data['src']}
|
||||
if sub_data.get('generated'):
|
||||
automatic_captions.setdefault(sub_lang, []).append(sub_entry)
|
||||
else:
|
||||
subtitles.setdefault(sub_lang, []).append(sub_entry)
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
@@ -118,6 +129,8 @@ def _real_extract(self, url):
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
|
||||
164
yt_dlp/extractor/frontro.py
Normal file
164
yt_dlp/extractor/frontro.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FrontoBaseIE(InfoExtractor):
|
||||
def _get_auth_headers(self, url):
|
||||
return traverse_obj(self._get_cookies(url), {
|
||||
'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
|
||||
})
|
||||
|
||||
|
||||
class FrontroVideoBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', video_id, data=json.dumps({
|
||||
'operationName': 'Video',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
|
||||
'query': '''query Video($channelID: ID!, $videoID: ID!) {
|
||||
video(ChannelID: $channelID, VideoID: $videoID) {
|
||||
... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['video']
|
||||
if not traverse_obj(metadata, 'hasAccess'):
|
||||
self.raise_login_required()
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class FrontroGroupBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
_VIDEO_EXTRACTOR = None
|
||||
_VIDEO_URL_TMPL = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
|
||||
data=json.dumps({
|
||||
'operationName': 'PaginatedStaticPageContainer',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
|
||||
'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
|
||||
pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
|
||||
... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
|
||||
id contentItem { ... on ItemVideo { videoItem: item {
|
||||
id
|
||||
}}}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['pageContainer']
|
||||
|
||||
entries = []
|
||||
for video_id in traverse_obj(metadata, (
|
||||
'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
|
||||
):
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TheChosenIE(FrontroVideoBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4212,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||
'timestamp': 1698954546,
|
||||
'upload_date': '20231102',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 5092,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||
'timestamp': 1715019474,
|
||||
'upload_date': '20240506',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
@@ -98,7 +98,7 @@ def _real_extract(self, url):
|
||||
|
||||
formats = []
|
||||
for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
|
||||
stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
|
||||
stream_url = re.sub(r'/playlist_pd\d+\.m3u8', '/playlist.m3u8', stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
|
||||
|
||||
metadata = self._download_json(
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import re
|
||||
import functools
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -10,15 +12,64 @@
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
class MaveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://api.mave.digital/v1/website'
|
||||
_API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
|
||||
|
||||
def _load_channel_meta(self, channel_id, display_id):
|
||||
return traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/', display_id,
|
||||
note='Downloading channel metadata'), 'podcast')
|
||||
|
||||
def _load_episode_meta(self, channel_id, episode_code, display_id):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
|
||||
display_id, note='Downloading episode metadata')
|
||||
|
||||
def _create_entry(self, channel_id, channel_meta, episode_meta):
|
||||
episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
|
||||
return {
|
||||
'display_id': f'{channel_id}-{episode_code}',
|
||||
'extractor_key': MaveIE.ie_key(),
|
||||
'extractor': MaveIE.IE_NAME,
|
||||
'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode_meta, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(channel_meta, {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MaveIE(MaveBaseIE):
|
||||
IE_NAME = 'mave'
|
||||
_VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'display_id': 'ochenlichnoe-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
@@ -45,7 +96,7 @@ class MaveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'display_id': 'budem-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
@@ -68,40 +119,72 @@ class MaveIE(InfoExtractor):
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
channel_id, episode_code = self._match_valid_url(url).group(
|
||||
'channel_id', 'episode_code')
|
||||
display_id = f'{channel_id}-{episode_code}'
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, display_id)
|
||||
episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
|
||||
|
||||
return self._create_entry(channel_id, channel_meta, episode_meta)
|
||||
|
||||
|
||||
class MaveChannelIE(MaveBaseIE):
|
||||
IE_NAME = 'mave:channel'
|
||||
_VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://budem.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'budem',
|
||||
'title': 'Все там будем',
|
||||
'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://ochenlichnoe.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'ochenlichnoe',
|
||||
'title': 'Очень личное',
|
||||
'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://geekcity.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'geekcity',
|
||||
'title': 'Мужчины в трико',
|
||||
'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _entries(self, channel_id, channel_meta, page_num):
|
||||
page_data = self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
|
||||
'view': 'all',
|
||||
'page': page_num + 1,
|
||||
'sort': 'newest',
|
||||
'format': 'all',
|
||||
}, note=f'Downloading page {page_num + 1}')
|
||||
for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
|
||||
yield self._create_entry(channel_id, channel_meta, ep)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, channel_id)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
**traverse_obj(channel_meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
'description': ('description', {str}),
|
||||
}),
|
||||
'entries': InAdvancePagedList(
|
||||
functools.partial(self._entries, channel_id, channel_meta),
|
||||
math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
|
||||
}
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
@@ -30,25 +25,8 @@ class MedalTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/ugcp/content-thumbnail/.*\.jpg',
|
||||
'tags': ['headshot', 'valorant', '4k', 'clutch', 'mornu'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
@@ -57,12 +35,12 @@ class MedalTVIE(InfoExtractor):
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'description': '',
|
||||
'uploader': 'zahl',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.png',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
@@ -70,91 +48,77 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 9,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# API requires auth
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'md5': '6c6bb6569777fd8b4ef7b33c09de8dcf',
|
||||
'info_dict': {
|
||||
'id': '2WRj40tpY_EU9',
|
||||
'ext': 'mp4',
|
||||
'title': '1v5 clutch',
|
||||
'description': '',
|
||||
'uploader': 'adny',
|
||||
'uploader_id': '6256941',
|
||||
'uploader_url': 'https://medal.tv/users/6256941',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 25,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.jpg',
|
||||
'timestamp': 1612896680,
|
||||
'upload_date': '20210209',
|
||||
},
|
||||
'expected_warnings': ['Video formats are not available through API'],
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': round(aspect_ratio * height),
|
||||
'height': height,
|
||||
})
|
||||
content_data = self._download_json(
|
||||
f'https://medal.tv/api/content/{video_id}', video_id,
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
self.raise_no_formats(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'An unknown error occurred ({error}).',
|
||||
video_id=video_id)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
if m3u8_url := url_or_none(content_data.get('contentUrlHls')):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
if http_url := url_or_none(content_data.get('contentUrl')):
|
||||
formats.append({
|
||||
'url': http_url,
|
||||
'format_id': 'http-source',
|
||||
'ext': 'mp4',
|
||||
'quality': 1,
|
||||
})
|
||||
formats = [fmt for fmt in formats if 'video/privacy-protected-guest' not in fmt['url']]
|
||||
if not formats:
|
||||
# Fallback, does not require auth
|
||||
self.report_warning('Video formats are not available through API, falling back to social video URL')
|
||||
urlh = self._request_webpage(
|
||||
f'https://medal.tv/api/content/{video_id}/socialVideoUrl', video_id,
|
||||
note='Checking social video URL')
|
||||
formats.append({
|
||||
'url': urlh.url,
|
||||
'format_id': 'social-video',
|
||||
'ext': 'mp4',
|
||||
'quality': -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('contentTitle', {str}),
|
||||
'description': ('contentDescription', {str}),
|
||||
'timestamp': ('created', {int_or_none(scale=1000)}),
|
||||
'duration': ('videoLengthSeconds', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'uploader': ('poster', 'displayName', {str}),
|
||||
'uploader_id': ('poster', 'userId', {str}),
|
||||
'uploader_url': ('poster', 'userId', {str}, filter, {lambda x: x and f'https://medal.tv/users/{x}'}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
79
yt_dlp/extractor/netapp.py
Normal file
79
yt_dlp/extractor/netapp.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NetAppBaseIE(InfoExtractor):
|
||||
_BC_URL = 'https://players.brightcove.net/6255154784001/default_default/index.html?videoId={}'
|
||||
|
||||
@staticmethod
|
||||
def _parse_metadata(item):
|
||||
return traverse_obj(item, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
})
|
||||
|
||||
|
||||
class NetAppVideoIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/video-detail/(?P<id>[0-9a-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/video-detail/da25fc01-82ad-5284-95bc-26920200a222/seamless-storage-for-modern-kubernetes-deployments',
|
||||
'info_dict': {
|
||||
'id': '1843620950167202073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seamless storage for modern Kubernetes deployments',
|
||||
'description': 'md5:1ee39e315243fe71fb90af2796037248',
|
||||
'uploader_id': '6255154784001',
|
||||
'duration': 2159.41,
|
||||
'thumbnail': r're:https://house-fastly-signed-us-east-1-prod\.brightcovecdn\.com/image/.*\.jpg',
|
||||
'tags': 'count:15',
|
||||
'timestamp': 1758213949,
|
||||
'upload_date': '20250918',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://media.netapp.com/video-detail/45593e5d-cf1c-5996-978c-c9081906e69f/unleash-ai-innovation-with-your-data-with-the-netapp-platform',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/detail/{video_uuid}', video_uuid)
|
||||
|
||||
brightcove_video_id = traverse_obj(metadata, (
|
||||
'sections', lambda _, v: v['type'] == 'Player', 'video', {str}, any, {require('brightcove video id')}))
|
||||
|
||||
video_item = traverse_obj(metadata, ('sections', lambda _, v: v['type'] == 'VideoDetail', any))
|
||||
|
||||
return self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(video_item))
|
||||
|
||||
|
||||
class NetAppCollectionIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/collection/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/collection/9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
'info_dict': {
|
||||
'title': 'Featured sessions',
|
||||
'id': '9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _entries(self, metadata):
|
||||
for item in traverse_obj(metadata, ('items', lambda _, v: v['brightcoveVideoId'])):
|
||||
brightcove_video_id = item['brightcoveVideoId']
|
||||
yield self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(item))
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/collection/{collection_uuid}', collection_uuid)
|
||||
|
||||
return self.playlist_result(self._entries(metadata), collection_uuid, playlist_title=metadata.get('name'))
|
||||
@@ -23,96 +23,38 @@
|
||||
|
||||
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://api.nhkworld.jp/showsapi/v1/{lang}/{content_format}_{page_type}/{m_id}{extra_page}'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
|
||||
|
||||
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||
content_format = 'video' if is_video else 'audio'
|
||||
content_type = 'clips' if is_clip else 'episodes'
|
||||
if not is_episode:
|
||||
extra_page = f'/{content_format}_{content_type}'
|
||||
page_type = 'programs'
|
||||
else:
|
||||
extra_page = ''
|
||||
page_type = content_type
|
||||
|
||||
return self._download_json(
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if is_clip else 'esd',
|
||||
'episode' if is_episode else 'program',
|
||||
m_id, lang, '/all' if is_video else ''),
|
||||
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
|
||||
|
||||
def _get_api_info(self, refresh=True):
|
||||
if not refresh:
|
||||
return self.cache.load('nhk', 'api_info')
|
||||
|
||||
self.cache.store('nhk', 'api_info', {})
|
||||
movie_player_js = self._download_webpage(
|
||||
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
|
||||
note='Downloading stream API information')
|
||||
api_info = {
|
||||
'url': self._search_regex(
|
||||
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
|
||||
'token': self._search_regex(
|
||||
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
|
||||
}
|
||||
self.cache.store('nhk', 'api_info', api_info)
|
||||
return api_info
|
||||
|
||||
def _extract_stream_info(self, vod_id):
|
||||
for refresh in (False, True):
|
||||
api_info = self._get_api_info(refresh)
|
||||
if not api_info:
|
||||
continue
|
||||
|
||||
api_url = api_info.pop('url')
|
||||
meta = traverse_obj(
|
||||
self._download_json(
|
||||
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
|
||||
**api_info,
|
||||
'type': 'json',
|
||||
'optional_id': vod_id,
|
||||
'active_flg': 1,
|
||||
}), ('meta', 0))
|
||||
stream_url = traverse_obj(
|
||||
meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
|
||||
|
||||
if stream_url:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
|
||||
return {
|
||||
**traverse_obj(meta, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('publication_date', {unified_timestamp}),
|
||||
'release_timestamp': ('insert_date', {unified_timestamp}),
|
||||
'modified_timestamp': ('update_date', {unified_timestamp}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
raise ExtractorError('Unable to extract stream url')
|
||||
self._API_URL_TEMPLATE.format(
|
||||
lang=lang, content_format=content_format, page_type=page_type,
|
||||
m_id=m_id, extra_page=extra_page),
|
||||
join_nonempty(m_id, lang))
|
||||
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
|
||||
is_video = m_type != 'audio'
|
||||
|
||||
if is_video:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
if fetch_episode:
|
||||
episode = self._call_api(
|
||||
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
|
||||
episode_id, lang, is_video, is_episode=True, is_clip=episode_id[:4] == '9999')
|
||||
|
||||
def get_clean_field(key):
|
||||
return clean_html(episode.get(key + '_clean') or episode.get(key))
|
||||
video_id = join_nonempty('id', 'lang', from_dict=episode)
|
||||
|
||||
title = get_clean_field('sub_title')
|
||||
series = get_clean_field('title')
|
||||
|
||||
thumbnails = []
|
||||
for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
|
||||
img_path = episode.get('image' + s)
|
||||
if not img_path:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': f'{h}p',
|
||||
'height': h,
|
||||
'width': w,
|
||||
'url': 'https://www3.nhk.or.jp' + img_path,
|
||||
})
|
||||
title = episode.get('title')
|
||||
series = traverse_obj(episode, (('video_program', 'audio_program'), any, 'title'))
|
||||
|
||||
episode_name = title
|
||||
if series and title:
|
||||
@@ -125,37 +67,52 @@ def get_clean_field(key):
|
||||
episode_name = None
|
||||
|
||||
info = {
|
||||
'id': episode_id + '-' + lang,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_clean_field('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'episode': episode_name,
|
||||
**traverse_obj(episode, {
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('first_broadcasted_at', {unified_timestamp}),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'thumbnails': ('images', lambda _, v: v['url'], {
|
||||
'url': ('url', {urljoin(url)}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'webpage_url': ('url', {urljoin(url)}),
|
||||
}),
|
||||
'extractor_key': NhkVodIE.ie_key(),
|
||||
'extractor': NhkVodIE.IE_NAME,
|
||||
}
|
||||
|
||||
if is_video:
|
||||
vod_id = episode['vod_id']
|
||||
info.update({
|
||||
**self._extract_stream_info(vod_id),
|
||||
'id': vod_id,
|
||||
})
|
||||
|
||||
# XXX: We are assuming that 'video' and 'audio' are mutually exclusive
|
||||
stream_info = traverse_obj(episode, (('video', 'audio'), {dict}, any)) or {}
|
||||
if not stream_info.get('url'):
|
||||
self.raise_no_formats('Stream not found; it has most likely expired', expected=True)
|
||||
else:
|
||||
if fetch_episode:
|
||||
stream_url = stream_info['url']
|
||||
if is_video:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(stream_info, ({
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('published_at', {unified_timestamp}),
|
||||
})),
|
||||
})
|
||||
else:
|
||||
# From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
|
||||
audio_path = remove_end(episode['audio']['audio'], '.m4a')
|
||||
audio_path = remove_end(stream_url, '.m4a')
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': NhkVodIE.ie_key(),
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@@ -168,29 +125,29 @@ class NhkVodIE(NhkBaseIE):
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2049165/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
|
||||
'id': '2049165-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
|
||||
'title': 'Japan Railway Journal - Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'description': 'md5:ab57df2fca7f04245148c2e787bb203d',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'episode': 'Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'series': 'Japan Railway Journal',
|
||||
'modified_timestamp': 1707217907,
|
||||
'timestamp': 1681428600,
|
||||
'release_timestamp': 1693883728,
|
||||
'duration': 1679,
|
||||
'upload_date': '20230413',
|
||||
'modified_date': '20240206',
|
||||
'release_date': '20230905',
|
||||
'duration': 1680,
|
||||
'categories': ['Biz & Tech'],
|
||||
'tags': ['Akita', 'Chiba', 'Trains', 'Transcript', 'All (Japan Navigator)'],
|
||||
'timestamp': 1759055880,
|
||||
'upload_date': '20250928',
|
||||
'release_timestamp': 1758810600,
|
||||
'release_date': '20250925',
|
||||
},
|
||||
}, {
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '153c3016dfd252ba09726588149cf0e7',
|
||||
'info_dict': {
|
||||
'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
|
||||
'id': '9999011-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
@@ -198,24 +155,23 @@ class NhkVodIE(NhkBaseIE):
|
||||
'series': 'Dining with the Chef',
|
||||
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'duration': 148,
|
||||
'upload_date': '20190816',
|
||||
'release_date': '20230902',
|
||||
'release_timestamp': 1693619292,
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'timestamp': 1565997540,
|
||||
'categories': ['Food'],
|
||||
'tags': ['Washoku'],
|
||||
'timestamp': 1548212400,
|
||||
'upload_date': '20190123',
|
||||
},
|
||||
}, {
|
||||
# radio
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20240901-1/',
|
||||
'info_dict': {
|
||||
'id': 'livinginjapan-20231001-1-en',
|
||||
'id': 'livinginjapan-20240901-1-en',
|
||||
'ext': 'm4a',
|
||||
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
'title': 'Living in Japan - Weekend Hiking / Self-protection from crime',
|
||||
'series': 'Living in Japan',
|
||||
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
|
||||
'description': 'md5:4d0e14ab73bdbfedb60a53b093954ed6',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
'episode': 'Weekend Hiking / Self-protection from crime',
|
||||
'categories': ['Interactive'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
@@ -256,96 +212,51 @@ class NhkVodIE(NhkBaseIE):
|
||||
},
|
||||
'skip': 'expires 2023-10-15',
|
||||
}, {
|
||||
# a one-off (single-episode series). title from the api is just '<p></p>'
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
|
||||
# a one-off (single-episode series). title from the api is just null
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
|
||||
'id': '3026036-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'description': 'md5:5db620c46a0698451cc59add8816b797',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'release_date': '20230905',
|
||||
'timestamp': 1690103400,
|
||||
'duration': 2939,
|
||||
'release_timestamp': 1693898699,
|
||||
'upload_date': '20230723',
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'series': 'Barakan Discovers',
|
||||
'title': 'STATELESS: The Japanese Left Behind in the Philippines',
|
||||
'description': 'md5:9a2fd51cdfa9f52baae28569e0053786',
|
||||
'duration': 2955,
|
||||
'thumbnail': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/images/wide_l_QPtWpt4lzVhm3NzPAMIIF35MCg4CdNwcikPaTS5Q.jpg',
|
||||
'categories': ['Documentary', 'Culture & Lifestyle'],
|
||||
'tags': ['Transcript', 'Documentary 360', 'The Pursuit of PEACE'],
|
||||
'timestamp': 1758931800,
|
||||
'upload_date': '20250927',
|
||||
'release_timestamp': 1758931800,
|
||||
'release_date': '20250927',
|
||||
},
|
||||
}, {
|
||||
# /ondemand/video/ url with alphabetical character in 5th position of id
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
|
||||
'info_dict': {
|
||||
'id': 'nw_c_en_9999-a07',
|
||||
'id': '9999a07-en',
|
||||
'ext': 'mp4',
|
||||
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'series': 'Mini-Dramas on SDGs',
|
||||
'modified_date': '20240206',
|
||||
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
|
||||
'timestamp': 1621962360,
|
||||
'duration': 189,
|
||||
'release_date': '20230903',
|
||||
'modified_timestamp': 1707217907,
|
||||
'timestamp': 1621911600,
|
||||
'duration': 190,
|
||||
'upload_date': '20210525',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'release_timestamp': 1693713487,
|
||||
'categories': ['Current Affairs', 'Entertainment'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
|
||||
'info_dict': {
|
||||
'id': 'nw_c_en_9999-d17',
|
||||
'id': '9999d17-en',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
|
||||
'description': 'Today’s focus: Snow',
|
||||
'release_timestamp': 1693792402,
|
||||
'release_date': '20230904',
|
||||
'upload_date': '20220128',
|
||||
'timestamp': 1643370960,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 136,
|
||||
'series': '',
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
},
|
||||
}, {
|
||||
# new /shows/ url format
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
|
||||
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
|
||||
'episode': '20th Anniversary Special Part 1',
|
||||
'series': 'Japanology Plus',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 1680,
|
||||
'timestamp': 1711020600,
|
||||
'upload_date': '20240321',
|
||||
'release_timestamp': 1711022683,
|
||||
'release_date': '20240321',
|
||||
'modified_timestamp': 1711031012,
|
||||
'modified_date': '20240321',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
|
||||
'ext': 'mp4',
|
||||
'title': '100 Ideas to Save the World - Working Styles Evolve',
|
||||
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
|
||||
'episode': 'Working Styles Evolve',
|
||||
'series': '100 Ideas to Save the World',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 899,
|
||||
'upload_date': '20230325',
|
||||
'timestamp': 1679755200,
|
||||
'release_date': '20230905',
|
||||
'release_timestamp': 1693880540,
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
'categories': ['Culture & Lifestyle', 'Science & Nature'],
|
||||
'tags': ['Nara', 'Temples & Shrines', 'Winter', 'Snow'],
|
||||
'timestamp': 1643339040,
|
||||
'upload_date': '20220128',
|
||||
},
|
||||
}, {
|
||||
# new /shows/audio/ url format
|
||||
@@ -373,6 +284,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'sumo',
|
||||
'title': 'GRAND SUMO Highlights',
|
||||
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
|
||||
'series': 'GRAND SUMO Highlights',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
@@ -381,6 +293,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -390,6 +303,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -399,17 +313,9 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'livinginjapan',
|
||||
'title': 'Living in Japan',
|
||||
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
|
||||
'series': 'Living in Japan',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
# /tv/ program url
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
|
||||
'info_dict': {
|
||||
'id': 'designtalksplus',
|
||||
'title': 'DESIGN TALKS plus',
|
||||
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
|
||||
'only_matching': True,
|
||||
@@ -430,9 +336,8 @@ def _real_extract(self, url):
|
||||
program_id, lang, m_type != 'audio', False, episode_type == 'clip')
|
||||
|
||||
def entries():
|
||||
for episode in episodes:
|
||||
if episode_path := episode.get('url'):
|
||||
yield self._extract_episode_info(urljoin(url, episode_path), episode)
|
||||
for episode in traverse_obj(episodes, ('items', lambda _, v: v['url'])):
|
||||
yield self._extract_episode_info(urljoin(url, episode['url']), episode)
|
||||
|
||||
html = self._download_webpage(url, program_id)
|
||||
program_title = self._extract_meta_from_class_elements([
|
||||
@@ -446,7 +351,7 @@ def entries():
|
||||
'tAudioProgramMain__info', # /shows/audio/programs/
|
||||
'p-program-description'], html) # /tv/
|
||||
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description)
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description, series=program_title)
|
||||
|
||||
|
||||
class NhkForSchoolBangumiIE(InfoExtractor):
|
||||
|
||||
37
yt_dlp/extractor/nowcanal.py
Normal file
37
yt_dlp/extractor/nowcanal.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NowCanalIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowcanal\.pt(?:/[\w-]+)+/detalhe/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nowcanal.pt/ultimas/detalhe/pedro-sousa-hjulmand-pode-ter-uma-saida-limpa-do-sporting-daqui-a-um-ano',
|
||||
'md5': '047f17cb783e66e467d703e704bbc95d',
|
||||
'info_dict': {
|
||||
'id': '6376598467112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pedro Sousa «Hjulmand pode ter uma saída limpa do Sporting daqui a um ano»',
|
||||
'description': '',
|
||||
'uploader_id': '6108484330001',
|
||||
'duration': 65.237,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'timestamp': 1754440620,
|
||||
'upload_date': '20250806',
|
||||
'tags': ['now'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nowcanal.pt/programas/frente-a-frente/detalhe/frente-a-frente-eva-cruzeiro-ps-e-rita-matias-chega',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BC_URL_TMPL = 'https://players.brightcove.net/6108484330001/chhIqzukMq_default/index.html?videoId={}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_json(
|
||||
r'videoHandler\.addBrightcoveVideoWithJson\(\[',
|
||||
webpage, 'video data', display_id)['brightcoveVideoId']
|
||||
|
||||
return self.url_result(self._BC_URL_TMPL.format(video_id), BrightcoveNewIE)
|
||||
@@ -598,7 +598,8 @@ def _real_extract(self, url):
|
||||
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
|
||||
if not campaign_id:
|
||||
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
|
||||
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
|
||||
((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
|
||||
'id', {str}, any, {require('campaign ID')}))
|
||||
|
||||
params = {
|
||||
'json-api-use-default-includes': 'false',
|
||||
|
||||
@@ -3,12 +3,14 @@
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://rinse.fm/api/query/v1'
|
||||
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
@@ -45,8 +47,10 @@ class RinseFMIE(RinseFMBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
entry = self._download_json(
|
||||
f'{self._API_BASE}/episodes/{display_id}', display_id,
|
||||
note='Downloading episode data from API')['entry']
|
||||
|
||||
return self._parse_entry(entry)
|
||||
|
||||
@@ -58,32 +62,35 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.',
|
||||
'description': 'md5:fd6a7254e8273510e6d49fbf50edf392',
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'url': 'https://www.rinse.fm/shows/esk',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.',
|
||||
'id': 'esk',
|
||||
'title': 'Esk',
|
||||
'description': 'md5:5893d7c1d411ae8dea7fba12f109aa98',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
api_data = self._download_json(
|
||||
f'{self._API_BASE}/shows/{playlist_id}', playlist_id,
|
||||
note='Downloading show data from API')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
self._entries(api_data), playlist_id,
|
||||
**traverse_obj(api_data, ('entry', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
})))
|
||||
|
||||
@@ -15,14 +15,15 @@ class S4CIE(InfoExtractor):
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||
# Geo restricted to the UK
|
||||
'url': 'https://www.s4c.cymru/clic/programme/886303048',
|
||||
'info_dict': {
|
||||
'id': '856636948',
|
||||
'id': '886303048',
|
||||
'ext': 'mp4',
|
||||
'title': 'Am Dro',
|
||||
'title': 'Pennod 1',
|
||||
'description': 'md5:7e3f364b70f61fcdaa8b4cb4a3eb3e7a',
|
||||
'duration': 2880,
|
||||
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Stad_2025S4C_P1_210053.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -51,7 +52,7 @@ def _real_extract(self, url):
|
||||
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||
'mode': 'od',
|
||||
'application': 'clic',
|
||||
'region': 'WW',
|
||||
'region': 'UK' if player_config.get('application') == 's4chttpl' else 'WW',
|
||||
'extra': 'false',
|
||||
'thirdParty': 'false',
|
||||
'filename': player_config['filename'],
|
||||
|
||||
@@ -1064,7 +1064,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?:soundcloud(?:%3A|:)playlists(?:%3A|:))?(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||
@@ -1079,6 +1079,12 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud%3Aplaylists%3A1759227795',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud:playlists:2104769627?secret_token=s-wmpCLuExeYX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -101,8 +101,8 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
data = self._search_json(
|
||||
r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info',
|
||||
video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
r'(?:window\.|(?:var|const|let)\s+)(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage,
|
||||
'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_call,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ThisOldHouseIE(InfoExtractor):
|
||||
@@ -77,46 +76,43 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
|
||||
urlh = self._request_webpage(
|
||||
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
|
||||
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
|
||||
login_page = self._download_webpage(
|
||||
'https://www.thisoldhouse.com/insider-login', None, 'Downloading login page')
|
||||
hidden_inputs = self._hidden_inputs(login_page)
|
||||
response = self._download_json(
|
||||
'https://www.thisoldhouse.com/wp-admin/admin-ajax.php', None, 'Logging in',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=urlencode_postdata(filter_dict({
|
||||
'action': 'onebill_subscriber_login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'pricingPlanTerm': hidden_inputs['pricing_plan_term'],
|
||||
'utm_parameters': hidden_inputs.get('utm_parameters'),
|
||||
'nonce': hidden_inputs['mdcr_onebill_login_nonce'],
|
||||
})))
|
||||
|
||||
try:
|
||||
auth_form = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Submitting credentials', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': urlh.url,
|
||||
}, data=json.dumps(filter_dict({
|
||||
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
|
||||
'tenant': 'thisoldhouse',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'popup_options': {},
|
||||
'sso': True,
|
||||
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
|
||||
'_intstate': 'deprecated',
|
||||
}), separators=(',', ':')).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
message = traverse_obj(response, ('data', 'message', {str}))
|
||||
if not response['success']:
|
||||
if message and 'Something went wrong' in message:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(
|
||||
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
|
||||
data=urlencode_postdata(self._hidden_inputs(auth_form)))
|
||||
raise ExtractorError(message or 'Login was unsuccessful')
|
||||
if message and 'Your subscription is not active' in message:
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said your subscription is not active. '
|
||||
f'If your subscription is active, this could be caused by too many sign-ins, '
|
||||
f'and you should instead try using {self._login_hint(method="cookies")[4:]}')
|
||||
else:
|
||||
self.write_debug(f'{self.IE_NAME} said: {message}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if 'To Unlock This content' in webpage:
|
||||
self.raise_login_required(
|
||||
'This video is only available for subscribers. '
|
||||
'Note that --cookies-from-browser may not work due to this site using session cookies')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
# If login response says inactive subscription, site redirects to frontpage for Insider content
|
||||
if 'To Unlock This content' in webpage or urllib.parse.urlparse(urlh.url).path in ('', '/'):
|
||||
self.raise_login_required('This video is only available for subscribers')
|
||||
|
||||
video_url, video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
|
||||
@@ -182,13 +182,13 @@ def _entries(self, show_url, playlist_id, selected_season):
|
||||
webpage = self._download_webpage(show_url, playlist_id)
|
||||
|
||||
data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['video']
|
||||
r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['queries'][0]['state']['data']
|
||||
|
||||
# v['number'] is already a decimal string, but stringify to protect against API changes
|
||||
path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
|
||||
|
||||
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)):
|
||||
for season in traverse_obj(data, ('seasons', *path)):
|
||||
season_number = int_or_none(season.get('number'))
|
||||
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
|
||||
episode_id = episode['id']
|
||||
|
||||
@@ -7,15 +7,15 @@
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class URPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
|
||||
'md5': '5ba36643c77cc3d34ffeadad89937d1e',
|
||||
'info_dict': {
|
||||
'id': '203704',
|
||||
'ext': 'mp4',
|
||||
@@ -31,6 +31,7 @@ class URPlayIE(InfoExtractor):
|
||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'age_limit': 15,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv',
|
||||
'info_dict': {
|
||||
@@ -49,6 +50,7 @@ class URPlayIE(InfoExtractor):
|
||||
'tags': 'count:7',
|
||||
'episode': 'Mitt barn skadar sig själv',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
'info_dict': {
|
||||
@@ -68,6 +70,27 @@ class URPlayIE(InfoExtractor):
|
||||
'episode': 'Sovkudde',
|
||||
'season': 'Säsong 1',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Only accessible through new media api
|
||||
'url': 'https://urplay.se/program/242932-vulkanernas-krafter-fran-kraftfull-till-forgorande',
|
||||
'info_dict': {
|
||||
'id': '242932',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanernas krafter : Från kraftfull till förgörande',
|
||||
'description': 'md5:742bb87048e7d5a7f209d28f9bb70ab1',
|
||||
'age_limit': 15,
|
||||
'duration': 2613,
|
||||
'thumbnail': 'https://assets.ur.se/id/242932/images/1_hd.jpg',
|
||||
'categories': ['Vetenskap & teknik'],
|
||||
'tags': ['Geofysik', 'Naturvetenskap', 'Vulkaner', 'Vulkanutbrott'],
|
||||
'series': 'Vulkanernas krafter',
|
||||
'episode': 'Från kraftfull till förgörande',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1763514000,
|
||||
'upload_date': '20251119',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
|
||||
'only_matching': True,
|
||||
@@ -88,21 +111,12 @@ def _real_extract(self, url):
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||
episode = urplayer_data['title']
|
||||
|
||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||
formats = []
|
||||
urplayer_streams = urplayer_data.get('streamingInfo', {})
|
||||
|
||||
for k, v in urplayer_streams.get('raw', {}).items():
|
||||
if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)):
|
||||
continue
|
||||
file_http = v.get('location')
|
||||
if file_http:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
f'http://{host}/{file_http}playlist.m3u8',
|
||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||
|
||||
subtitles = {}
|
||||
sources = self._download_json(
|
||||
f'https://media-api.urplay.se/config-streaming/v1/urplay/sources/{video_id}', video_id,
|
||||
note='Downloading streaming information')
|
||||
hls_url = traverse_obj(sources, ('sources', 'hls', {url_or_none}, {require('HLS URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
def parse_lang_code(code):
|
||||
"3-character language code or None (utils candidate)"
|
||||
|
||||
@@ -339,11 +339,20 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
'title': 'The Roof S2: The Modern CRO',
|
||||
'thumbnail': r're:https?://embed(?:-ssl)?\.wistia\.com/.+\.(?:jpg|png)',
|
||||
'duration': 86.487,
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season. ',
|
||||
'timestamp': 1619790290,
|
||||
'upload_date': '20210430',
|
||||
},
|
||||
'params': {'noplaylist': True, 'skip_download': True},
|
||||
}, {
|
||||
# Channel with episodes structure instead of videos
|
||||
'url': 'https://fast.wistia.net/embed/channel/sapab9p6qd',
|
||||
'info_dict': {
|
||||
'id': 'sapab9p6qd',
|
||||
'title': 'Credo: An RCIA Program',
|
||||
'description': '\n',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.profitwell.com/recur/boxed-out',
|
||||
@@ -399,8 +408,7 @@ def _real_extract(self, url):
|
||||
|
||||
entries = [
|
||||
self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
|
||||
for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
|
||||
if video.get('hashedId')
|
||||
for video in traverse_obj(series, ('sections', ..., ('videos', 'episodes'), lambda _, v: v['hashedId']))
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
67
yt_dlp/extractor/yfanefa.py
Normal file
67
yt_dlp/extractor/yfanefa.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YfanefaIE(InfoExtractor):
|
||||
IE_NAME = 'yfanefa'
|
||||
_VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.yfanefa.com/record/2717',
|
||||
'info_dict': {
|
||||
'id': 'record-2717',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914',
|
||||
'duration': 5239,
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/news/53',
|
||||
'info_dict': {
|
||||
'id': 'news-53',
|
||||
'ext': 'mp4',
|
||||
'title': 'Memory Bank: Bradford Launch',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/evaluating_nature_matters',
|
||||
'info_dict': {
|
||||
'id': 'evaluating_nature_matters',
|
||||
'ext': 'mp4',
|
||||
'title': 'Evaluating Nature Matters',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = self._search_json(
|
||||
r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id)
|
||||
|
||||
formats = []
|
||||
video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='')
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||
|
||||
return {
|
||||
'id': video_id.strip('/').replace('/', '-'),
|
||||
'title':
|
||||
self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'),
|
||||
'formats': formats,
|
||||
**traverse_obj(player_data, {
|
||||
'thumbnail': ('preview', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -76,7 +76,7 @@
|
||||
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
|
||||
STREAMING_DATA_FETCHED_TIMESTAMP = '__yt_dlp_fetched_timestamp'
|
||||
STREAMING_DATA_AVAILABLE_AT_TIMESTAMP = '__yt_dlp_available_at_timestamp'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
@@ -3032,7 +3032,6 @@ def append_client(*client_names):
|
||||
elif pr:
|
||||
# Save client details for introspection later
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
fetched_timestamp = int(time.time())
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3040,7 +3039,7 @@ def append_client(*client_names):
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||||
sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
|
||||
sd[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP] = self._get_available_at_timestamp(pr, video_id, client)
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3150,6 +3149,9 @@ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url,
|
||||
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
||||
'Use formats=duplicate extractor argument instead')
|
||||
|
||||
def is_super_resolution(f_url):
|
||||
return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...))
|
||||
|
||||
def solve_sig(s, spec):
|
||||
return ''.join(s[i] for i in spec)
|
||||
|
||||
@@ -3169,9 +3171,6 @@ def gvs_pot_required(policy, is_premium_subscriber, has_player_token):
|
||||
# save pots per client to avoid fetching again
|
||||
gvs_pots = {}
|
||||
|
||||
# For handling potential pre-playback required waiting period
|
||||
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
||||
|
||||
def get_language_code_and_preference(fmt_stream):
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
display_name = audio_track.get('displayName') or ''
|
||||
@@ -3196,13 +3195,13 @@ def get_language_code_and_preference(fmt_stream):
|
||||
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||||
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||||
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
|
||||
available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
|
||||
available_at = streaming_data[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP]
|
||||
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
|
||||
|
||||
def get_stream_id(fmt_stream):
|
||||
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||||
|
||||
def process_format_stream(fmt_stream, proto, missing_pot):
|
||||
def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False):
|
||||
itag = str_or_none(fmt_stream.get('itag'))
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
quality = fmt_stream.get('quality')
|
||||
@@ -3253,10 +3252,13 @@ def process_format_stream(fmt_stream, proto, missing_pot):
|
||||
dct = {
|
||||
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||||
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
||||
'format_id': join_nonempty(itag, (
|
||||
'drc' if fmt_stream.get('isDrc')
|
||||
else 'sr' if super_resolution
|
||||
else None)),
|
||||
'format_note': join_nonempty(
|
||||
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
||||
name, fmt_stream.get('isDrc') and 'DRC',
|
||||
name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled',
|
||||
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
||||
@@ -3342,7 +3344,9 @@ def process_https_formats():
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
|
||||
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
|
||||
fmt = process_format_stream(
|
||||
fmt_stream, proto, missing_pot=require_po_token and not po_token,
|
||||
super_resolution=is_super_resolution(fmt_url))
|
||||
if not fmt:
|
||||
continue
|
||||
|
||||
@@ -3645,6 +3649,36 @@ def _download_initial_webpage(self, webpage_url, webpage_client, video_id):
|
||||
}))
|
||||
return webpage
|
||||
|
||||
def _get_available_at_timestamp(self, player_response, video_id, client):
|
||||
now = time.time()
|
||||
wait_seconds = 0
|
||||
|
||||
for renderer in traverse_obj(player_response, (
|
||||
'adSlots', lambda _, v: v['adSlotRenderer']['adSlotMetadata']['triggerEvent'] == 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT',
|
||||
'adSlotRenderer', 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', 'renderingContent', (
|
||||
None,
|
||||
('playerBytesSequentialLayoutRenderer', 'sequentialLayouts', ..., 'playerBytesAdLayoutRenderer', 'renderingContent'),
|
||||
), 'instreamVideoAdRenderer', {dict},
|
||||
)):
|
||||
duration = traverse_obj(renderer, ('playerVars', {urllib.parse.parse_qs}, 'length_seconds', -1, {int_or_none}))
|
||||
ad = 'an ad' if duration is None else f'a {duration}s ad'
|
||||
|
||||
skip_time = traverse_obj(renderer, ('skipOffsetMilliseconds', {float_or_none(scale=1000)}))
|
||||
if skip_time is not None:
|
||||
# YT allows skipping this ad; use the wait-until-skip time instead of full ad duration
|
||||
skip_time = skip_time if skip_time % 1 else int(skip_time)
|
||||
ad += f' skippable after {skip_time}s'
|
||||
duration = skip_time
|
||||
|
||||
if duration is not None:
|
||||
self.write_debug(f'{video_id}: Detected {ad} for {client}')
|
||||
wait_seconds += duration
|
||||
|
||||
if wait_seconds:
|
||||
return math.ceil(now) + wait_seconds
|
||||
|
||||
return int(now)
|
||||
|
||||
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
|
||||
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
|
||||
is_live = get_first(video_details, 'isLive')
|
||||
@@ -3995,6 +4029,11 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
STREAMING_DATA_CLIENT_NAME: client_name,
|
||||
})
|
||||
|
||||
def set_audio_lang_from_orig_subs_lang(lang_code):
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and not f.get('language'):
|
||||
f['language'] = lang_code
|
||||
|
||||
subtitles = {}
|
||||
skipped_subs_clients = set()
|
||||
|
||||
@@ -4054,7 +4093,8 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
|
||||
orig_lang = qs.get('lang', [None])[-1]
|
||||
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
||||
if caption_track.get('kind') != 'asr':
|
||||
is_manual_subs = caption_track.get('kind') != 'asr'
|
||||
if is_manual_subs:
|
||||
if not lang_code:
|
||||
continue
|
||||
process_language(
|
||||
@@ -4065,16 +4105,14 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
if not trans_code:
|
||||
continue
|
||||
orig_trans_code = trans_code
|
||||
if caption_track.get('kind') != 'asr' and trans_code != 'und':
|
||||
if is_manual_subs and trans_code != 'und':
|
||||
if not get_translated_subs:
|
||||
continue
|
||||
trans_code += f'-{lang_code}'
|
||||
trans_name += format_field(lang_name, None, ' from %s')
|
||||
if lang_code == f'a-{orig_trans_code}':
|
||||
# Set audio language based on original subtitles
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and not f.get('language'):
|
||||
f['language'] = orig_trans_code
|
||||
set_audio_lang_from_orig_subs_lang(orig_trans_code)
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
process_language(
|
||||
@@ -4085,6 +4123,21 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
automatic_captions, base_url, trans_code, trans_name, client_name,
|
||||
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
|
||||
|
||||
# Extract automatic captions when the language is not in 'translationLanguages'
|
||||
# e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889
|
||||
lang_code = remove_start(lang_code, 'a-')
|
||||
if is_manual_subs or not lang_code or lang_code in automatic_captions:
|
||||
continue
|
||||
lang_name = remove_end(lang_name, ' (auto-generated)')
|
||||
if caption_track.get('isTranslatable'):
|
||||
# We can assume this is the original audio language
|
||||
set_audio_lang_from_orig_subs_lang(lang_code)
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{lang_code}-orig',
|
||||
f'{lang_name} (Original)', client_name, pot_params)
|
||||
process_language(
|
||||
automatic_captions, base_url, lang_code, lang_name, client_name, pot_params)
|
||||
|
||||
# Avoid duplication if we've already got everything we need
|
||||
need_subs_langs.difference_update(subtitles)
|
||||
need_caps_langs.difference_update(automatic_captions)
|
||||
|
||||
@@ -305,6 +305,8 @@ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
|
||||
status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
|
||||
|
||||
def read(self, amt=None):
|
||||
if self.closed:
|
||||
return b''
|
||||
try:
|
||||
data = self.fp.read(amt)
|
||||
underlying = getattr(self.fp, 'fp', None)
|
||||
|
||||
@@ -192,7 +192,10 @@ def _probe_version(self):
|
||||
|
||||
@property
|
||||
def available(self):
|
||||
return bool(self._ffmpeg_location.get()) or self.basename is not None
|
||||
# If we return that ffmpeg is available, then the basename property *must* be run
|
||||
# (as doing so has side effects), and its value can never be None
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/12829
|
||||
return self.basename is not None
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
|
||||
@@ -1,21 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import dataclasses
|
||||
import functools
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
from ._utils import _get_exe_version_output, detect_exe_version, int_or_none
|
||||
|
||||
|
||||
# NOT public API
|
||||
def runtime_version_tuple(v):
|
||||
def _runtime_version_tuple(v):
|
||||
# NB: will return (0,) if `v` is an invalid version string
|
||||
return tuple(int_or_none(x, default=0) for x in v.split('.'))
|
||||
|
||||
|
||||
_FALLBACK_PATHEXT = ('.COM', '.EXE', '.BAT', '.CMD')
|
||||
|
||||
|
||||
def _find_exe(basename: str) -> str:
|
||||
if os.name != 'nt':
|
||||
return basename
|
||||
|
||||
paths: list[str] = []
|
||||
|
||||
# binary dir
|
||||
if getattr(sys, 'frozen', False):
|
||||
paths.append(os.path.dirname(sys.executable))
|
||||
# cwd
|
||||
paths.append(os.getcwd())
|
||||
# PATH items
|
||||
if path := os.environ.get('PATH'):
|
||||
paths.extend(filter(None, path.split(os.path.pathsep)))
|
||||
|
||||
pathext = os.environ.get('PATHEXT')
|
||||
if pathext is None:
|
||||
exts = _FALLBACK_PATHEXT
|
||||
else:
|
||||
exts = tuple(ext for ext in pathext.split(os.pathsep) if ext)
|
||||
|
||||
visited = []
|
||||
for path in map(os.path.realpath, paths):
|
||||
normed = os.path.normcase(path)
|
||||
if normed in visited:
|
||||
continue
|
||||
visited.append(normed)
|
||||
|
||||
for ext in exts:
|
||||
binary = os.path.join(path, f'{basename}{ext}')
|
||||
if os.access(binary, os.F_OK | os.X_OK) and not os.path.isdir(binary):
|
||||
return binary
|
||||
|
||||
return basename
|
||||
|
||||
|
||||
def _determine_runtime_path(path, basename):
|
||||
if not path:
|
||||
return basename
|
||||
return _find_exe(basename)
|
||||
if os.path.isdir(path):
|
||||
return os.path.join(path, basename)
|
||||
return path
|
||||
@@ -52,7 +92,7 @@ def _info(self):
|
||||
if not out:
|
||||
return None
|
||||
version = detect_exe_version(out, r'^deno (\S+)', 'unknown')
|
||||
vt = runtime_version_tuple(version)
|
||||
vt = _runtime_version_tuple(version)
|
||||
return JsRuntimeInfo(
|
||||
name='deno', path=path, version=version, version_tuple=vt,
|
||||
supported=vt >= self.MIN_SUPPORTED_VERSION)
|
||||
@@ -67,7 +107,7 @@ def _info(self):
|
||||
if not out:
|
||||
return None
|
||||
version = detect_exe_version(out, r'^(\S+)', 'unknown')
|
||||
vt = runtime_version_tuple(version)
|
||||
vt = _runtime_version_tuple(version)
|
||||
return JsRuntimeInfo(
|
||||
name='bun', path=path, version=version, version_tuple=vt,
|
||||
supported=vt >= self.MIN_SUPPORTED_VERSION)
|
||||
@@ -82,7 +122,7 @@ def _info(self):
|
||||
if not out:
|
||||
return None
|
||||
version = detect_exe_version(out, r'^v(\S+)', 'unknown')
|
||||
vt = runtime_version_tuple(version)
|
||||
vt = _runtime_version_tuple(version)
|
||||
return JsRuntimeInfo(
|
||||
name='node', path=path, version=version, version_tuple=vt,
|
||||
supported=vt >= self.MIN_SUPPORTED_VERSION)
|
||||
@@ -100,7 +140,7 @@ def _info(self):
|
||||
is_ng = 'QuickJS-ng' in out
|
||||
|
||||
version = detect_exe_version(out, r'^QuickJS(?:-ng)?\s+version\s+(\S+)', 'unknown')
|
||||
vt = runtime_version_tuple(version.replace('-', '.'))
|
||||
vt = _runtime_version_tuple(version.replace('-', '.'))
|
||||
if is_ng:
|
||||
return JsRuntimeInfo(
|
||||
name='quickjs-ng', path=path, version=version, version_tuple=vt,
|
||||
|
||||
@@ -876,13 +876,19 @@ def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs
|
||||
kwargs.setdefault('encoding', 'utf-8')
|
||||
kwargs.setdefault('errors', 'replace')
|
||||
|
||||
if shell and os.name == 'nt' and kwargs.get('executable') is None:
|
||||
if not isinstance(args, str):
|
||||
args = shell_quote(args, shell=True)
|
||||
shell = False
|
||||
# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
|
||||
env['='] = '"^\n\n"'
|
||||
args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
|
||||
if os.name == 'nt' and kwargs.get('executable') is None:
|
||||
# Must apply shell escaping if we are trying to run a batch file
|
||||
# These conditions should be very specific to limit impact
|
||||
if not shell and isinstance(args, list) and args and args[0].lower().endswith(('.bat', '.cmd')):
|
||||
shell = True
|
||||
|
||||
if shell:
|
||||
if not isinstance(args, str):
|
||||
args = shell_quote(args, shell=True)
|
||||
shell = False
|
||||
# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
|
||||
env['='] = '"^\n\n"'
|
||||
args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
|
||||
|
||||
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user