mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-16 14:15:41 +07:00
Compare commits
15 Commits
2025.11.12
...
0eed3fe530
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0eed3fe530 | ||
|
|
a4c72acc46 | ||
|
|
9daba4f442 | ||
|
|
854fded114 | ||
|
|
5f66ac71f6 | ||
|
|
4cb5e191ef | ||
|
|
6ee6a6fc58 | ||
|
|
23f1ab3469 | ||
|
|
af285016d2 | ||
|
|
1dd84b9d1c | ||
|
|
b333ef1b3f | ||
|
|
4e680db150 | ||
|
|
45a3b42bb9 | ||
|
|
d6aa8c235d | ||
|
|
947e788340 |
@@ -755,6 +755,17 @@ def test_partial_read_then_full_read(self, handler):
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
@@ -920,6 +931,28 @@ def test_http_response_auto_close(self, handler):
|
||||
assert res.fp.fp is None
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(6) == b'hello '
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'world'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(512) == b'hello world'
|
||||
# Response and its underlying file object should already be closed now
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_http_error_returns_content(self, handler):
|
||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||
def get_response():
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
pytestmark = pytest.mark.handler_flaky(
|
||||
'Websockets',
|
||||
os.name != 'nt' and sys.implementation.name == 'pypy',
|
||||
os.name == 'nt' or sys.implementation.name == 'pypy',
|
||||
reason='segfaults',
|
||||
)
|
||||
|
||||
|
||||
@@ -461,7 +461,8 @@ def download(self, filename, info_dict, subtitle=False):
|
||||
min_sleep_interval = self.params.get('sleep_interval') or 0
|
||||
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
||||
|
||||
if available_at := info_dict.get('available_at'):
|
||||
requested_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
if available_at := max(f.get('available_at') or 0 for f in requested_formats):
|
||||
forced_sleep_interval = available_at - int(time.time())
|
||||
if forced_sleep_interval > min_sleep_interval:
|
||||
sleep_note = 'as required by the site'
|
||||
|
||||
@@ -457,6 +457,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
|
||||
@@ -268,6 +268,7 @@
|
||||
BitChuteChannelIE,
|
||||
BitChuteIE,
|
||||
)
|
||||
from .bitmovin import BitmovinIE
|
||||
from .blackboardcollaborate import (
|
||||
BlackboardCollaborateIE,
|
||||
BlackboardCollaborateLaunchIE,
|
||||
@@ -690,6 +691,10 @@
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .frontro import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
@@ -1093,7 +1098,10 @@
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import MaveIE
|
||||
from .mave import (
|
||||
MaveChannelIE,
|
||||
MaveIE,
|
||||
)
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@@ -1368,6 +1376,7 @@
|
||||
NovaIE,
|
||||
)
|
||||
from .novaplay import NovaPlayIE
|
||||
from .nowcanal import NowCanalIE
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
@@ -2521,6 +2530,7 @@
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yfanefa import YfanefaIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
|
||||
74
yt_dlp/extractor/bitmovin.py
Normal file
74
yt_dlp/extractor/bitmovin.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitmovinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streams\.bitmovin\.com/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//streams\.bitmovin\.com/(?P<id>\w+)[^"\']+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/embed',
|
||||
'info_dict': {
|
||||
'id': 'cqkl1t5giv3lrce7pjbg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developing Osteopathic Residents as Faculty',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/share',
|
||||
'info_dict': {
|
||||
'id': 'cgl9rh94uvs51rqc8jhg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny (Streams Docs)',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# bitmovin-stream web component
|
||||
'url': 'https://www.institutionalinvestor.com/article/2bsw1in1l9k68mp9kritc/video-war-stories-over-board-games/best-case-i-get-fired-war-stories',
|
||||
'info_dict': {
|
||||
'id': 'cuiumeil6g115lc4li3g',
|
||||
'ext': 'mp4',
|
||||
'title': '[media] War Stories over Board Games: “Best Case: I Get Fired” ',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cuiumeil6g115lc4li3g/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'https://www.clearblueionizer.com/en/pool-ionizers/mineral-pool-vs-saltwater-pool/',
|
||||
'info_dict': {
|
||||
'id': 'cvpvfsm1pf7itg7cfvtg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pool Ionizer vs. Salt Chlorinator',
|
||||
'thumbnail': 'https://streams.bitmovin.com/cvpvfsm1pf7itg7cfvtg/poster',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for stream_id in re.findall(r'<bitmovin-stream\b[^>]*\bstream-id=["\'](?P<id>\w+)', webpage):
|
||||
yield f'https://streams.bitmovin.com/{stream_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
f'https://streams.bitmovin.com/{video_id}/config', video_id)['sources']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
player_config['hls'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(player_config, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('poster', {str}),
|
||||
}),
|
||||
}
|
||||
@@ -109,6 +109,17 @@ def _real_extract(self, url):
|
||||
'hls_media_playlist_data': m3u8_data,
|
||||
'hls_aes': hls_aes or None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for sub_data in traverse_obj(metadata, ('textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
sub_lang = sub_data.get('language') or 'en'
|
||||
sub_entry = {'url': sub_data['src']}
|
||||
if sub_data.get('generated'):
|
||||
automatic_captions.setdefault(sub_lang, []).append(sub_entry)
|
||||
else:
|
||||
subtitles.setdefault(sub_lang, []).append(sub_entry)
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
@@ -118,6 +129,8 @@ def _real_extract(self, url):
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
|
||||
164
yt_dlp/extractor/frontro.py
Normal file
164
yt_dlp/extractor/frontro.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FrontoBaseIE(InfoExtractor):
|
||||
def _get_auth_headers(self, url):
|
||||
return traverse_obj(self._get_cookies(url), {
|
||||
'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
|
||||
})
|
||||
|
||||
|
||||
class FrontroVideoBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', video_id, data=json.dumps({
|
||||
'operationName': 'Video',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
|
||||
'query': '''query Video($channelID: ID!, $videoID: ID!) {
|
||||
video(ChannelID: $channelID, VideoID: $videoID) {
|
||||
... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['video']
|
||||
if not traverse_obj(metadata, 'hasAccess'):
|
||||
self.raise_login_required()
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class FrontroGroupBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
_VIDEO_EXTRACTOR = None
|
||||
_VIDEO_URL_TMPL = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
|
||||
data=json.dumps({
|
||||
'operationName': 'PaginatedStaticPageContainer',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
|
||||
'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
|
||||
pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
|
||||
... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
|
||||
id contentItem { ... on ItemVideo { videoItem: item {
|
||||
id
|
||||
}}}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['pageContainer']
|
||||
|
||||
entries = []
|
||||
for video_id in traverse_obj(metadata, (
|
||||
'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
|
||||
):
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TheChosenIE(FrontroVideoBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4212,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||
'timestamp': 1698954546,
|
||||
'upload_date': '20231102',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 5092,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||
'timestamp': 1715019474,
|
||||
'upload_date': '20240506',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
@@ -98,7 +98,7 @@ def _real_extract(self, url):
|
||||
|
||||
formats = []
|
||||
for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
|
||||
stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
|
||||
stream_url = re.sub(r'/playlist_pd\d+\.m3u8', '/playlist.m3u8', stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
|
||||
|
||||
metadata = self._download_json(
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import re
|
||||
import functools
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -10,15 +12,64 @@
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
class MaveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://api.mave.digital/v1/website'
|
||||
_API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
|
||||
|
||||
def _load_channel_meta(self, channel_id, display_id):
|
||||
return traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/', display_id,
|
||||
note='Downloading channel metadata'), 'podcast')
|
||||
|
||||
def _load_episode_meta(self, channel_id, episode_code, display_id):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
|
||||
display_id, note='Downloading episode metadata')
|
||||
|
||||
def _create_entry(self, channel_id, channel_meta, episode_meta):
|
||||
episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
|
||||
return {
|
||||
'display_id': f'{channel_id}-{episode_code}',
|
||||
'extractor_key': MaveIE.ie_key(),
|
||||
'extractor': MaveIE.IE_NAME,
|
||||
'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode_meta, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(channel_meta, {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MaveIE(MaveBaseIE):
|
||||
IE_NAME = 'mave'
|
||||
_VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'display_id': 'ochenlichnoe-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
@@ -45,7 +96,7 @@ class MaveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'display_id': 'budem-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
@@ -68,40 +119,72 @@ class MaveIE(InfoExtractor):
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
channel_id, episode_code = self._match_valid_url(url).group(
|
||||
'channel_id', 'episode_code')
|
||||
display_id = f'{channel_id}-{episode_code}'
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, display_id)
|
||||
episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
|
||||
|
||||
return self._create_entry(channel_id, channel_meta, episode_meta)
|
||||
|
||||
|
||||
class MaveChannelIE(MaveBaseIE):
|
||||
IE_NAME = 'mave:channel'
|
||||
_VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://budem.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'budem',
|
||||
'title': 'Все там будем',
|
||||
'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://ochenlichnoe.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'ochenlichnoe',
|
||||
'title': 'Очень личное',
|
||||
'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://geekcity.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'geekcity',
|
||||
'title': 'Мужчины в трико',
|
||||
'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _entries(self, channel_id, channel_meta, page_num):
|
||||
page_data = self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
|
||||
'view': 'all',
|
||||
'page': page_num + 1,
|
||||
'sort': 'newest',
|
||||
'format': 'all',
|
||||
}, note=f'Downloading page {page_num + 1}')
|
||||
for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
|
||||
yield self._create_entry(channel_id, channel_meta, ep)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, channel_id)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
**traverse_obj(channel_meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
'description': ('description', {str}),
|
||||
}),
|
||||
'entries': InAdvancePagedList(
|
||||
functools.partial(self._entries, channel_id, channel_meta),
|
||||
math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
|
||||
}
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
@@ -30,25 +25,8 @@ class MedalTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/ugcp/content-thumbnail/.*\.jpg',
|
||||
'tags': ['headshot', 'valorant', '4k', 'clutch', 'mornu'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
@@ -57,12 +35,12 @@ class MedalTVIE(InfoExtractor):
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'description': '',
|
||||
'uploader': 'zahl',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.png',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
@@ -70,91 +48,77 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 9,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# API requires auth
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'md5': '6c6bb6569777fd8b4ef7b33c09de8dcf',
|
||||
'info_dict': {
|
||||
'id': '2WRj40tpY_EU9',
|
||||
'ext': 'mp4',
|
||||
'title': '1v5 clutch',
|
||||
'description': '',
|
||||
'uploader': 'adny',
|
||||
'uploader_id': '6256941',
|
||||
'uploader_url': 'https://medal.tv/users/6256941',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 25,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.jpg',
|
||||
'timestamp': 1612896680,
|
||||
'upload_date': '20210209',
|
||||
},
|
||||
'expected_warnings': ['Video formats are not available through API'],
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': round(aspect_ratio * height),
|
||||
'height': height,
|
||||
})
|
||||
content_data = self._download_json(
|
||||
f'https://medal.tv/api/content/{video_id}', video_id,
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
self.raise_no_formats(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'An unknown error occurred ({error}).',
|
||||
video_id=video_id)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
if m3u8_url := url_or_none(content_data.get('contentUrlHls')):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
if http_url := url_or_none(content_data.get('contentUrl')):
|
||||
formats.append({
|
||||
'url': http_url,
|
||||
'format_id': 'http-source',
|
||||
'ext': 'mp4',
|
||||
'quality': 1,
|
||||
})
|
||||
formats = [fmt for fmt in formats if 'video/privacy-protected-guest' not in fmt['url']]
|
||||
if not formats:
|
||||
# Fallback, does not require auth
|
||||
self.report_warning('Video formats are not available through API, falling back to social video URL')
|
||||
urlh = self._request_webpage(
|
||||
f'https://medal.tv/api/content/{video_id}/socialVideoUrl', video_id,
|
||||
note='Checking social video URL')
|
||||
formats.append({
|
||||
'url': urlh.url,
|
||||
'format_id': 'social-video',
|
||||
'ext': 'mp4',
|
||||
'quality': -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('contentTitle', {str}),
|
||||
'description': ('contentDescription', {str}),
|
||||
'timestamp': ('created', {int_or_none(scale=1000)}),
|
||||
'duration': ('videoLengthSeconds', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'uploader': ('poster', 'displayName', {str}),
|
||||
'uploader_id': ('poster', 'userId', {str}),
|
||||
'uploader_url': ('poster', 'userId', {str}, filter, {lambda x: x and f'https://medal.tv/users/{x}'}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
37
yt_dlp/extractor/nowcanal.py
Normal file
37
yt_dlp/extractor/nowcanal.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NowCanalIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowcanal\.pt(?:/[\w-]+)+/detalhe/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nowcanal.pt/ultimas/detalhe/pedro-sousa-hjulmand-pode-ter-uma-saida-limpa-do-sporting-daqui-a-um-ano',
|
||||
'md5': '047f17cb783e66e467d703e704bbc95d',
|
||||
'info_dict': {
|
||||
'id': '6376598467112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pedro Sousa «Hjulmand pode ter uma saída limpa do Sporting daqui a um ano»',
|
||||
'description': '',
|
||||
'uploader_id': '6108484330001',
|
||||
'duration': 65.237,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'timestamp': 1754440620,
|
||||
'upload_date': '20250806',
|
||||
'tags': ['now'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nowcanal.pt/programas/frente-a-frente/detalhe/frente-a-frente-eva-cruzeiro-ps-e-rita-matias-chega',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BC_URL_TMPL = 'https://players.brightcove.net/6108484330001/chhIqzukMq_default/index.html?videoId={}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_json(
|
||||
r'videoHandler\.addBrightcoveVideoWithJson\(\[',
|
||||
webpage, 'video data', display_id)['brightcoveVideoId']
|
||||
|
||||
return self.url_result(self._BC_URL_TMPL.format(video_id), BrightcoveNewIE)
|
||||
@@ -3,12 +3,14 @@
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://rinse.fm/api/query/v1'
|
||||
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
@@ -45,8 +47,10 @@ class RinseFMIE(RinseFMBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
entry = self._download_json(
|
||||
f'{self._API_BASE}/episodes/{display_id}', display_id,
|
||||
note='Downloading episode data from API')['entry']
|
||||
|
||||
return self._parse_entry(entry)
|
||||
|
||||
@@ -58,32 +62,35 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.',
|
||||
'description': 'md5:fd6a7254e8273510e6d49fbf50edf392',
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'url': 'https://www.rinse.fm/shows/esk',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.',
|
||||
'id': 'esk',
|
||||
'title': 'Esk',
|
||||
'description': 'md5:5893d7c1d411ae8dea7fba12f109aa98',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
api_data = self._download_json(
|
||||
f'{self._API_BASE}/shows/{playlist_id}', playlist_id,
|
||||
note='Downloading show data from API')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
self._entries(api_data), playlist_id,
|
||||
**traverse_obj(api_data, ('entry', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
})))
|
||||
|
||||
@@ -1064,7 +1064,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?:soundcloud(?:%3A|:)playlists(?:%3A|:))?(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||
@@ -1079,6 +1079,12 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud%3Aplaylists%3A1759227795',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud:playlists:2104769627?secret_token=s-wmpCLuExeYX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_call,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ThisOldHouseIE(InfoExtractor):
|
||||
@@ -77,46 +76,43 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
|
||||
urlh = self._request_webpage(
|
||||
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
|
||||
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
|
||||
login_page = self._download_webpage(
|
||||
'https://www.thisoldhouse.com/insider-login', None, 'Downloading login page')
|
||||
hidden_inputs = self._hidden_inputs(login_page)
|
||||
response = self._download_json(
|
||||
'https://www.thisoldhouse.com/wp-admin/admin-ajax.php', None, 'Logging in',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=urlencode_postdata(filter_dict({
|
||||
'action': 'onebill_subscriber_login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'pricingPlanTerm': hidden_inputs['pricing_plan_term'],
|
||||
'utm_parameters': hidden_inputs.get('utm_parameters'),
|
||||
'nonce': hidden_inputs['mdcr_onebill_login_nonce'],
|
||||
})))
|
||||
|
||||
try:
|
||||
auth_form = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Submitting credentials', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': urlh.url,
|
||||
}, data=json.dumps(filter_dict({
|
||||
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
|
||||
'tenant': 'thisoldhouse',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'popup_options': {},
|
||||
'sso': True,
|
||||
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
|
||||
'_intstate': 'deprecated',
|
||||
}), separators=(',', ':')).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
message = traverse_obj(response, ('data', 'message', {str}))
|
||||
if not response['success']:
|
||||
if message and 'Something went wrong' in message:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(
|
||||
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
|
||||
data=urlencode_postdata(self._hidden_inputs(auth_form)))
|
||||
raise ExtractorError(message or 'Login was unsuccessful')
|
||||
if message and 'Your subscription is not active' in message:
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said your subscription is not active. '
|
||||
f'If your subscription is active, this could be caused by too many sign-ins, '
|
||||
f'and you should instead try using {self._login_hint(method="cookies")[4:]}')
|
||||
else:
|
||||
self.write_debug(f'{self.IE_NAME} said: {message}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if 'To Unlock This content' in webpage:
|
||||
self.raise_login_required(
|
||||
'This video is only available for subscribers. '
|
||||
'Note that --cookies-from-browser may not work due to this site using session cookies')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
# If login response says inactive subscription, site redirects to frontpage for Insider content
|
||||
if 'To Unlock This content' in webpage or urllib.parse.urlparse(urlh.url).path in ('', '/'):
|
||||
self.raise_login_required('This video is only available for subscribers')
|
||||
|
||||
video_url, video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
|
||||
67
yt_dlp/extractor/yfanefa.py
Normal file
67
yt_dlp/extractor/yfanefa.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YfanefaIE(InfoExtractor):
|
||||
IE_NAME = 'yfanefa'
|
||||
_VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.yfanefa.com/record/2717',
|
||||
'info_dict': {
|
||||
'id': 'record-2717',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914',
|
||||
'duration': 5239,
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/news/53',
|
||||
'info_dict': {
|
||||
'id': 'news-53',
|
||||
'ext': 'mp4',
|
||||
'title': 'Memory Bank: Bradford Launch',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/evaluating_nature_matters',
|
||||
'info_dict': {
|
||||
'id': 'evaluating_nature_matters',
|
||||
'ext': 'mp4',
|
||||
'title': 'Evaluating Nature Matters',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = self._search_json(
|
||||
r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id)
|
||||
|
||||
formats = []
|
||||
video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='')
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||
|
||||
return {
|
||||
'id': video_id.strip('/').replace('/', '-'),
|
||||
'title':
|
||||
self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'),
|
||||
'formats': formats,
|
||||
**traverse_obj(player_data, {
|
||||
'thumbnail': ('preview', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -3150,6 +3150,9 @@ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url,
|
||||
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
||||
'Use formats=duplicate extractor argument instead')
|
||||
|
||||
def is_super_resolution(f_url):
|
||||
return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...))
|
||||
|
||||
def solve_sig(s, spec):
|
||||
return ''.join(s[i] for i in spec)
|
||||
|
||||
@@ -3202,7 +3205,7 @@ def get_language_code_and_preference(fmt_stream):
|
||||
def get_stream_id(fmt_stream):
|
||||
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||||
|
||||
def process_format_stream(fmt_stream, proto, missing_pot):
|
||||
def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False):
|
||||
itag = str_or_none(fmt_stream.get('itag'))
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
quality = fmt_stream.get('quality')
|
||||
@@ -3253,10 +3256,13 @@ def process_format_stream(fmt_stream, proto, missing_pot):
|
||||
dct = {
|
||||
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||||
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
||||
'format_id': join_nonempty(itag, (
|
||||
'drc' if fmt_stream.get('isDrc')
|
||||
else 'sr' if super_resolution
|
||||
else None)),
|
||||
'format_note': join_nonempty(
|
||||
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
||||
name, fmt_stream.get('isDrc') and 'DRC',
|
||||
name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled',
|
||||
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
||||
@@ -3342,7 +3348,9 @@ def process_https_formats():
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
|
||||
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
|
||||
fmt = process_format_stream(
|
||||
fmt_stream, proto, missing_pot=require_po_token and not po_token,
|
||||
super_resolution=is_super_resolution(fmt_url))
|
||||
if not fmt:
|
||||
continue
|
||||
|
||||
|
||||
@@ -305,6 +305,8 @@ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
|
||||
status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
|
||||
|
||||
def read(self, amt=None):
|
||||
if self.closed:
|
||||
return b''
|
||||
try:
|
||||
data = self.fp.read(amt)
|
||||
underlying = getattr(self.fp, 'fp', None)
|
||||
|
||||
@@ -192,7 +192,10 @@ def _probe_version(self):
|
||||
|
||||
@property
|
||||
def available(self):
|
||||
return bool(self._ffmpeg_location.get()) or self.basename is not None
|
||||
# If we return that ffmpeg is available, then the basename property *must* be run
|
||||
# (as doing so has side effects), and its value can never be None
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/12829
|
||||
return self.basename is not None
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
|
||||
Reference in New Issue
Block a user