[ie/TheChosen] Add extractors (#14183 )

Closes #11246 Authored by: mrFlamel
[ie/mave:channel] Add extractor (#14915 )
2025-12-16 06:05:41 +07:00 · 2025-11-17 00:17:55 +01:00 · 2025-11-17 00:05:44 +01:00 · 2025-11-16 22:39:22 +00:00 · 2025-11-16 19:07:48 +00:00 · 2025-11-16 18:15:16 +00:00
15 changed files with 566 additions and 61 deletions
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -755,6 +755,17 @@ def test_partial_read_then_full_read(self, handler):
                assert res.read(0) == b''
                assert res.read() == b'<video src="/vid.mp4" /></html>'

+    def test_partial_read_greater_than_response_then_full_read(self, handler):
+        with handler() as rh:
+            for encoding in ('', 'gzip', 'deflate'):
+                res = validate_and_send(rh, Request(
+                    f'http://127.0.0.1:{self.http_port}/content-encoding',
+                    headers={'ytdl-encoding': encoding}))
+                assert res.headers.get('Content-Encoding') == encoding
+                assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
+                assert res.read(0) == b''
+                assert res.read() == b''
+

@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
@@ -920,6 +931,28 @@ def test_http_response_auto_close(self, handler):
            assert res.fp.fp is None
            assert res.closed

+    def test_data_uri_partial_read_then_full_read(self, handler):
+        with handler() as rh:
+            res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
+            assert res.read(6) == b'hello '
+            assert res.read(0) == b''
+            assert res.read() == b'world'
+            # Should automatically close the underlying file object
+            assert res.fp.closed
+            assert res.closed
+
+    def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
+        with handler() as rh:
+            res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
+            assert res.read(512) == b'hello world'
+            # Response and its underlying file object should already be closed now
+            assert res.fp.closed
+            assert res.closed
+            assert res.read(0) == b''
+            assert res.read() == b''
+            assert res.fp.closed
+            assert res.closed
+
    def test_http_error_returns_content(self, handler):
        # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
        def get_response():
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@@ -40,7 +40,7 @@

 pytestmark = pytest.mark.handler_flaky(
    'Websockets',
-    os.name != 'nt' and sys.implementation.name == 'pypy',
+    os.name == 'nt' or sys.implementation.name == 'pypy',
    reason='segfaults',
 )

--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -461,7 +461,8 @@ def download(self, filename, info_dict, subtitle=False):
            min_sleep_interval = self.params.get('sleep_interval') or 0
            max_sleep_interval = self.params.get('max_sleep_interval') or 0

-            if available_at := info_dict.get('available_at'):
+            requested_formats = info_dict.get('requested_formats') or [info_dict]
+            if available_at := max(f.get('available_at') or 0 for f in requested_formats):
                forced_sleep_interval = available_at - int(time.time())
                if forced_sleep_interval > min_sleep_interval:
                    sleep_note = 'as required by the site'
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -268,6 +268,7 @@
    BitChuteChannelIE,
    BitChuteIE,
 )
+from .bitmovin import BitmovinIE
 from .blackboardcollaborate import (
    BlackboardCollaborateIE,
    BlackboardCollaborateLaunchIE,
@@ -690,6 +691,10 @@
    FrontendMastersIE,
    FrontendMastersLessonIE,
 )
+from .frontro import (
+    TheChosenGroupIE,
+    TheChosenIE,
+)
 from .fujitv import FujiTVFODPlus7IE
 from .funk import FunkIE
 from .funker530 import Funker530IE
@@ -1093,7 +1098,10 @@
 from .massengeschmacktv import MassengeschmackTVIE
 from .masters import MastersIE
 from .matchtv import MatchTVIE
-from .mave import MaveIE
+from .mave import (
+    MaveChannelIE,
+    MaveIE,
+)
 from .mbn import MBNIE
 from .mdr import MDRIE
 from .medaltv import MedalTVIE
@@ -1368,6 +1376,7 @@
    NovaIE,
 )
 from .novaplay import NovaPlayIE
+from .nowcanal import NowCanalIE
 from .nowness import (
    NownessIE,
    NownessPlaylistIE,
@@ -2521,6 +2530,7 @@
    YappyIE,
    YappyProfileIE,
 )
+from .yfanefa import YfanefaIE
 from .yle_areena import YleAreenaIE
 from .youjizz import YouJizzIE
 from .youku import (
--- a/yt_dlp/extractor/bitmovin.py
+++ b/yt_dlp/extractor/bitmovin.py
@@ -0,0 +1,74 @@
+import re
+
+from .common import InfoExtractor
+from ..utils.traversal import traverse_obj
+
+
+class BitmovinIE(InfoExtractor):
+    _VALID_URL = r'https?://streams\.bitmovin\.com/(?P<id>\w+)'
+    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//streams\.bitmovin\.com/(?P<id>\w+)[^"\']+)']
+    _TESTS = [{
+        'url': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/embed',
+        'info_dict': {
+            'id': 'cqkl1t5giv3lrce7pjbg',
+            'ext': 'mp4',
+            'title': 'Developing Osteopathic Residents as Faculty',
+            'thumbnail': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/share',
+        'info_dict': {
+            'id': 'cgl9rh94uvs51rqc8jhg',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny (Streams Docs)',
+            'thumbnail': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+    _WEBPAGE_TESTS = [{
+        # bitmovin-stream web component
+        'url': 'https://www.institutionalinvestor.com/article/2bsw1in1l9k68mp9kritc/video-war-stories-over-board-games/best-case-i-get-fired-war-stories',
+        'info_dict': {
+            'id': 'cuiumeil6g115lc4li3g',
+            'ext': 'mp4',
+            'title': '[media] War Stories over Board Games: ‚ÄúBest Case: I Get Fired‚Äù ',
+            'thumbnail': 'https://streams.bitmovin.com/cuiumeil6g115lc4li3g/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        # iframe embed
+        'url': 'https://www.clearblueionizer.com/en/pool-ionizers/mineral-pool-vs-saltwater-pool/',
+        'info_dict': {
+            'id': 'cvpvfsm1pf7itg7cfvtg',
+            'ext': 'mp4',
+            'title': 'Pool Ionizer vs. Salt Chlorinator',
+            'thumbnail': 'https://streams.bitmovin.com/cvpvfsm1pf7itg7cfvtg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        yield from super()._extract_embed_urls(url, webpage)
+        for stream_id in re.findall(r'<bitmovin-stream\b[^>]*\bstream-id=["\'](?P<id>\w+)', webpage):
+            yield f'https://streams.bitmovin.com/{stream_id}'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        player_config = self._download_json(
+            f'https://streams.bitmovin.com/{video_id}/config', video_id)['sources']
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            player_config['hls'], video_id, 'mp4')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(player_config, {
+                'title': ('title', {str}),
+                'thumbnail': ('poster', {str}),
+            }),
+        }
--- a/yt_dlp/extractor/floatplane.py
+++ b/yt_dlp/extractor/floatplane.py
@@ -109,6 +109,17 @@ def _real_extract(self, url):
                    'hls_media_playlist_data': m3u8_data,
                    'hls_aes': hls_aes or None,
                })
+
+            subtitles = {}
+            automatic_captions = {}
+            for sub_data in traverse_obj(metadata, ('textTracks', lambda _, v: url_or_none(v['src']))):
+                sub_lang = sub_data.get('language') or 'en'
+                sub_entry = {'url': sub_data['src']}
+                if sub_data.get('generated'):
+                    automatic_captions.setdefault(sub_lang, []).append(sub_entry)
+                else:
+                    subtitles.setdefault(sub_lang, []).append(sub_entry)
+
            items.append({
                **common_info,
                'id': media_id,
@@ -118,6 +129,8 @@ def _real_extract(self, url):
                    'thumbnail': ('thumbnail', 'path', {url_or_none}),
                }),
                'formats': formats,
+                'subtitles': subtitles,
+                'automatic_captions': automatic_captions,
            })

        post_info = {
--- a/yt_dlp/extractor/frontro.py
+++ b/yt_dlp/extractor/frontro.py
@@ -0,0 +1,164 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class FrontoBaseIE(InfoExtractor):
+    def _get_auth_headers(self, url):
+        return traverse_obj(self._get_cookies(url), {
+            'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
+        })
+
+
+class FrontroVideoBaseIE(FrontoBaseIE):
+    _CHANNEL_ID = None
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        metadata = self._download_json(
+            'https://api.frontrow.cc/query', video_id, data=json.dumps({
+                'operationName': 'Video',
+                'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
+                'query': '''query Video($channelID: ID!, $videoID: ID!) {
+                    video(ChannelID: $channelID, VideoID: $videoID) {
+                        ... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
+                    }
+                }''',
+            }).encode(), headers={
+                'content-type': 'application/json',
+                **self._get_auth_headers(url),
+            })['data']['video']
+        if not traverse_obj(metadata, 'hasAccess'):
+            self.raise_login_required()
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(metadata, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'thumbnail': ('thumbnail', {url_or_none}),
+                'timestamp': ('createdAt', {parse_iso8601}),
+                'modified_timestamp': ('updatedAt', {parse_iso8601}),
+                'duration': ('duration', {int_or_none}),
+                'like_count': ('likeCount', {int_or_none}),
+                'comment_count': ('comments', {int_or_none}),
+                'view_count': ('views', {int_or_none}),
+            }),
+        }
+
+
+class FrontroGroupBaseIE(FrontoBaseIE):
+    _CHANNEL_ID = None
+    _VIDEO_EXTRACTOR = None
+    _VIDEO_URL_TMPL = None
+
+    def _real_extract(self, url):
+        group_id = self._match_id(url)
+
+        metadata = self._download_json(
+            'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
+            data=json.dumps({
+                'operationName': 'PaginatedStaticPageContainer',
+                'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
+                'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
+                              pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
+                                ... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
+                                        id contentItem { ... on ItemVideo { videoItem: item {
+                                            id
+                                        }}}
+                                    }}}
+                                }
+                              }
+                            }''',
+            }).encode(), headers={
+                'content-type': 'application/json',
+                **self._get_auth_headers(url),
+            })['data']['pageContainer']
+
+        entries = []
+        for video_id in traverse_obj(metadata, (
+            'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
+        ):
+            entries.append(self.url_result(
+                self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
+
+        return {
+            '_type': 'playlist',
+            'id': group_id,
+            'entries': entries,
+            **traverse_obj(metadata, {
+                'title': ('title', {str}),
+                'timestamp': ('createdAt', {parse_iso8601}),
+                'modified_timestamp': ('updatedAt', {parse_iso8601}),
+            }),
+        }
+
+
+class TheChosenIE(FrontroVideoBaseIE):
+    _CHANNEL_ID = '12884901895'
+
+    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://watch.thechosen.tv/video/184683594325',
+        'md5': '3f878b689588c71b38ec9943c54ff5b0',
+        'info_dict': {
+            'id': '184683594325',
+            'ext': 'mp4',
+            'title': 'Season 3 Episode 2: Two by Two',
+            'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'duration': 4212,
+            'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
+            'timestamp': 1698954546,
+            'upload_date': '20231102',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+    }, {
+        'url': 'https://watch.thechosen.tv/video/184683596189',
+        'md5': 'd581562f9d29ce82f5b7770415334151',
+        'info_dict': {
+            'id': '184683596189',
+            'ext': 'mp4',
+            'title': 'Season 4 Episode 8: Humble',
+            'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'duration': 5092,
+            'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
+            'timestamp': 1715019474,
+            'upload_date': '20240506',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+    }]
+
+
+class TheChosenGroupIE(FrontroGroupBaseIE):
+    _CHANNEL_ID = '12884901895'
+    _VIDEO_EXTRACTOR = TheChosenIE
+    _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
+
+    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://watch.thechosen.tv/group/309237658592',
+        'info_dict': {
+            'id': '309237658592',
+            'title': 'Season 3',
+            'timestamp': 1746203969,
+            'upload_date': '20250502',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+        'playlist_count': 8,
+    }]
--- a/yt_dlp/extractor/jtbc.py
+++ b/yt_dlp/extractor/jtbc.py
@@ -98,7 +98,7 @@ def _real_extract(self, url):

        formats = []
        for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
-            stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
+            stream_url = re.sub(r'/playlist_pd\d+\.m3u8', '/playlist.m3u8', stream_url)
            formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))

        metadata = self._download_json(
--- a/yt_dlp/extractor/mave.py
+++ b/yt_dlp/extractor/mave.py
@@ -1,7 +1,9 @@
-import re
+import functools
+import math

 from .common import InfoExtractor
 from ..utils import (
+    InAdvancePagedList,
    clean_html,
    int_or_none,
    parse_iso8601,
@@ -10,15 +12,64 @@
 from ..utils.traversal import require, traverse_obj


-class MaveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
+class MaveBaseIE(InfoExtractor):
+    _API_BASE_URL = 'https://api.mave.digital/v1/website'
+    _API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
+
+    def _load_channel_meta(self, channel_id, display_id):
+        return traverse_obj(self._download_json(
+            f'{self._API_BASE_URL}/{channel_id}/', display_id,
+            note='Downloading channel metadata'), 'podcast')
+
+    def _load_episode_meta(self, channel_id, episode_code, display_id):
+        return self._download_json(
+            f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
+            display_id, note='Downloading episode metadata')
+
+    def _create_entry(self, channel_id, channel_meta, episode_meta):
+        episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
+        return {
+            'display_id': f'{channel_id}-{episode_code}',
+            'extractor_key': MaveIE.ie_key(),
+            'extractor': MaveIE.IE_NAME,
+            'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
+            'channel_id': channel_id,
+            'channel_url': f'https://{channel_id}.mave.digital/',
+            'vcodec': 'none',
+            **traverse_obj(episode_meta, {
+                'id': ('id', {str}),
+                'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
+                'title': ('title', {str}),
+                'description': ('description', {clean_html}),
+                'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
+                'duration': ('duration', {int_or_none}),
+                'season_number': ('season', {int_or_none}),
+                'episode_number': ('number', {int_or_none}),
+                'view_count': ('listenings', {int_or_none}),
+                'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
+                'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
+                'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
+                'timestamp': ('publish_date', {parse_iso8601}),
+            }),
+            **traverse_obj(channel_meta, {
+                'series_id': ('id', {str}),
+                'series': ('title', {str}),
+                'channel': ('title', {str}),
+                'uploader': ('author', {str}),
+            }),
+        }
+
+
+class MaveIE(MaveBaseIE):
+    IE_NAME = 'mave'
+    _VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
    _TESTS = [{
        'url': 'https://ochenlichnoe.mave.digital/ep-25',
        'md5': 'aa3e513ef588b4366df1520657cbc10c',
        'info_dict': {
            'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
            'ext': 'mp3',
-            'display_id': 'ochenlichnoe-ep-25',
+            'display_id': 'ochenlichnoe-25',
            'title': 'Между мной и миром: психология самооценки',
            'description': 'md5:4b7463baaccb6982f326bce5c700382a',
            'uploader': 'Самарский университет',
@@ -45,7 +96,7 @@ class MaveIE(InfoExtractor):
        'info_dict': {
            'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
            'ext': 'mp3',
-            'display_id': 'budem-ep-12',
+            'display_id': 'budem-12',
            'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
            'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
            'uploader': 'Полина Цветкова+Евгения Акопова',
@@ -68,40 +119,72 @@ class MaveIE(InfoExtractor):
            'upload_date': '20241230',
        },
    }]
-    _API_BASE_URL = 'https://api.mave.digital/'

    def _real_extract(self, url):
-        channel_id, slug = self._match_valid_url(url).group('channel', 'id')
-        display_id = f'{channel_id}-{slug}'
-        webpage = self._download_webpage(url, display_id)
-        data = traverse_obj(
-            self._search_nuxt_json(webpage, display_id),
-            ('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
+        channel_id, episode_code = self._match_valid_url(url).group(
+            'channel_id', 'episode_code')
+        display_id = f'{channel_id}-{episode_code}'
+
+        channel_meta = self._load_channel_meta(channel_id, display_id)
+        episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
+
+        return self._create_entry(channel_id, channel_meta, episode_meta)
+
+
+class MaveChannelIE(MaveBaseIE):
+    IE_NAME = 'mave:channel'
+    _VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://budem.mave.digital/',
+        'info_dict': {
+            'id': 'budem',
+            'title': 'Все там будем',
+            'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
+        },
+        'playlist_mincount': 15,
+    }, {
+        'url': 'https://ochenlichnoe.mave.digital/',
+        'info_dict': {
+            'id': 'ochenlichnoe',
+            'title': 'Очень личное',
+            'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
+        },
+        'playlist_mincount': 20,
+    }, {
+        'url': 'https://geekcity.mave.digital/',
+        'info_dict': {
+            'id': 'geekcity',
+            'title': 'Мужчины в трико',
+            'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
+        },
+        'playlist_mincount': 80,
+    }]
+    _PAGE_SIZE = 50
+
+    def _entries(self, channel_id, channel_meta, page_num):
+        page_data = self._download_json(
+            f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
+                'view': 'all',
+                'page': page_num + 1,
+                'sort': 'newest',
+                'format': 'all',
+            }, note=f'Downloading page {page_num + 1}')
+        for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
+            yield self._create_entry(channel_id, channel_meta, ep)
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+
+        channel_meta = self._load_channel_meta(channel_id, channel_id)

        return {
-            'display_id': display_id,
-            'channel_id': channel_id,
-            'channel_url': f'https://{channel_id}.mave.digital/',
-            'vcodec': 'none',
-            'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
-            **traverse_obj(data, ('activeEpisodeData', {
-                'url': ('audio', {urljoin(self._API_BASE_URL)}),
-                'id': ('id', {str}),
+            '_type': 'playlist',
+            'id': channel_id,
+            **traverse_obj(channel_meta, {
                'title': ('title', {str}),
-                'description': ('description', {clean_html}),
-                'duration': ('duration', {int_or_none}),
-                'season_number': ('season', {int_or_none}),
-                'episode_number': ('number', {int_or_none}),
-                'view_count': ('listenings', {int_or_none}),
-                'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
-                'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
-                'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
-                'timestamp': ('publish_date', {parse_iso8601}),
-            })),
-            **traverse_obj(data, ('podcast', 'podcast', {
-                'series_id': ('id', {str}),
-                'series': ('title', {str}),
-                'channel': ('title', {str}),
-                'uploader': ('author', {str}),
-            })),
+                'description': ('description', {str}),
+            }),
+            'entries': InAdvancePagedList(
+                functools.partial(self._entries, channel_id, channel_meta),
+                math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
        }
--- a/yt_dlp/extractor/nowcanal.py
+++ b/yt_dlp/extractor/nowcanal.py
@@ -0,0 +1,37 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+
+class NowCanalIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nowcanal\.pt(?:/[\w-]+)+/detalhe/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.nowcanal.pt/ultimas/detalhe/pedro-sousa-hjulmand-pode-ter-uma-saida-limpa-do-sporting-daqui-a-um-ano',
+        'md5': '047f17cb783e66e467d703e704bbc95d',
+        'info_dict': {
+            'id': '6376598467112',
+            'ext': 'mp4',
+            'title': 'Pedro Sousa «Hjulmand pode ter uma saída limpa do Sporting daqui a um ano»',
+            'description': '',
+            'uploader_id': '6108484330001',
+            'duration': 65.237,
+            'thumbnail': r're:^https://.+\.jpg',
+            'timestamp': 1754440620,
+            'upload_date': '20250806',
+            'tags': ['now'],
+        },
+    }, {
+        'url': 'https://www.nowcanal.pt/programas/frente-a-frente/detalhe/frente-a-frente-eva-cruzeiro-ps-e-rita-matias-chega',
+        'only_matching': True,
+    }]
+
+    _BC_URL_TMPL = 'https://players.brightcove.net/6108484330001/chhIqzukMq_default/index.html?videoId={}'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_json(
+            r'videoHandler\.addBrightcoveVideoWithJson\(\[',
+            webpage, 'video data', display_id)['brightcoveVideoId']
+
+        return self.url_result(self._BC_URL_TMPL.format(video_id), BrightcoveNewIE)
--- a/yt_dlp/extractor/rinsefm.py
+++ b/yt_dlp/extractor/rinsefm.py
@@ -3,12 +3,14 @@
    MEDIA_EXTENSIONS,
    determine_ext,
    parse_iso8601,
-    traverse_obj,
    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class RinseFMBaseIE(InfoExtractor):
+    _API_BASE = 'https://rinse.fm/api/query/v1'
+
    @staticmethod
    def _parse_entry(entry):
        return {
@@ -45,8 +47,10 @@ class RinseFMIE(RinseFMBaseIE):

    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
+
+        entry = self._download_json(
+            f'{self._API_BASE}/episodes/{display_id}', display_id,
+            note='Downloading episode data from API')['entry']

        return self._parse_entry(entry)

@@ -58,32 +62,35 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE):
        'info_dict': {
            'id': 'resources',
            'title': '[re]sources',
-            'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.',
+            'description': 'md5:fd6a7254e8273510e6d49fbf50edf392',
        },
        'playlist_mincount': 40,
    }, {
-        'url': 'https://rinse.fm/shows/ivy/',
+        'url': 'https://www.rinse.fm/shows/esk',
        'info_dict': {
-            'id': 'ivy',
-            'title': '[IVY]',
-            'description': 'A dedicated space for DNB/Turbo House and 4x4.',
+            'id': 'esk',
+            'title': 'Esk',
+            'description': 'md5:5893d7c1d411ae8dea7fba12f109aa98',
        },
-        'playlist_mincount': 7,
+        'playlist_mincount': 139,
    }]

    def _entries(self, data):
        for episode in traverse_obj(data, (
-            'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
+            'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
        ):
            yield self._parse_entry(episode)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
-        description = self._og_search_description(webpage) or self._html_search_meta(
-            'description', webpage)
-        data = self._search_nextjs_data(webpage, playlist_id)
+
+        api_data = self._download_json(
+            f'{self._API_BASE}/shows/{playlist_id}', playlist_id,
+            note='Downloading show data from API')

        return self.playlist_result(
-            self._entries(data), playlist_id, title, description=description)
+            self._entries(api_data), playlist_id,
+            **traverse_obj(api_data, ('entry', {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+            })))
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -1064,7 +1064,7 @@ def _real_extract(self, url):


 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
-    _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+    _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?:soundcloud(?:%3A|:)playlists(?:%3A|:))?(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
    IE_NAME = 'soundcloud:playlist'
    _TESTS = [{
        'url': 'https://api.soundcloud.com/playlists/4110309',
@@ -1079,6 +1079,12 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
            'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
        },
        'playlist_count': 6,
+    }, {
+        'url': 'https://api.soundcloud.com/playlists/soundcloud%3Aplaylists%3A1759227795',
+        'only_matching': True,
+    }, {
+        'url': 'https://api.soundcloud.com/playlists/soundcloud:playlists:2104769627?secret_token=s-wmpCLuExeYX',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/yfanefa.py
+++ b/yt_dlp/extractor/yfanefa.py
@@ -0,0 +1,67 @@
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    join_nonempty,
+    remove_end,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class YfanefaIE(InfoExtractor):
+    IE_NAME = 'yfanefa'
+    _VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P<id>[^?#]+)'
+    _TESTS = [{
+        'url': 'https://www.yfanefa.com/record/2717',
+        'info_dict': {
+            'id': 'record-2717',
+            'ext': 'mp4',
+            'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914',
+            'duration': 5239,
+            'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
+        },
+    }, {
+        'url': 'https://www.yfanefa.com/news/53',
+        'info_dict': {
+            'id': 'news-53',
+            'ext': 'mp4',
+            'title': 'Memory Bank:  Bradford Launch',
+            'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
+        },
+    }, {
+        'url': 'https://www.yfanefa.com/evaluating_nature_matters',
+        'info_dict': {
+            'id': 'evaluating_nature_matters',
+            'ext': 'mp4',
+            'title': 'Evaluating Nature Matters',
+            'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        player_data = self._search_json(
+            r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id)
+
+        formats = []
+        video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='')
+        if determine_ext(video_url) == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                video_url, video_id, 'mp4', m3u8_id='hls')
+        else:
+            formats = [{'url': video_url, 'ext': 'mp4'}]
+
+        return {
+            'id': video_id.strip('/').replace('/', '-'),
+            'title':
+                self._og_search_title(webpage, default=None)
+                or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'),
+            'formats': formats,
+            **traverse_obj(player_data, {
+                'thumbnail': ('preview', {url_or_none}),
+                'duration': ('duration', {int_or_none}),
+            }),
+        }
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -3150,6 +3150,9 @@ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url,
            self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
                                                'Use formats=duplicate extractor argument instead')

+        def is_super_resolution(f_url):
+            return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...))
+
        def solve_sig(s, spec):
            return ''.join(s[i] for i in spec)

@@ -3202,7 +3205,7 @@ def get_language_code_and_preference(fmt_stream):
            def get_stream_id(fmt_stream):
                return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')

-            def process_format_stream(fmt_stream, proto, missing_pot):
+            def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False):
                itag = str_or_none(fmt_stream.get('itag'))
                audio_track = fmt_stream.get('audioTrack') or {}
                quality = fmt_stream.get('quality')
@@ -3253,10 +3256,13 @@ def process_format_stream(fmt_stream, proto, missing_pot):
                dct = {
                    'asr': int_or_none(fmt_stream.get('audioSampleRate')),
                    'filesize': int_or_none(fmt_stream.get('contentLength')),
-                    'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
+                    'format_id': join_nonempty(itag, (
+                        'drc' if fmt_stream.get('isDrc')
+                        else 'sr' if super_resolution
+                        else None)),
                    'format_note': join_nonempty(
                        join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
-                        name, fmt_stream.get('isDrc') and 'DRC',
+                        name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled',
                        try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                        try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
                        is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
@@ -3342,7 +3348,9 @@ def process_https_formats():
                            self.report_warning(msg, video_id, only_once=True)
                            continue

-                    fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
+                    fmt = process_format_stream(
+                        fmt_stream, proto, missing_pot=require_po_token and not po_token,
+                        super_resolution=is_super_resolution(fmt_url))
                    if not fmt:
                        continue

--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -305,6 +305,8 @@ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
            status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))

    def read(self, amt=None):
+        if self.closed:
+            return b''
        try:
            data = self.fp.read(amt)
            underlying = getattr(self.fp, 'fp', None)
Author	SHA1	Message	Date
Mr Flamel	854fded114	[ie/TheChosen] Add extractors (#14183 ) Closes #11246 Authored by: mrFlamel	2025-11-17 00:17:55 +01:00
Anton Larionov	5f66ac71f6	[ie/mave:channel] Add extractor (#14915 ) Authored by: anlar	2025-11-17 00:05:44 +01:00
bashonly	4cb5e191ef	[ie/youtube] Detect "super resolution" AI-upscaled formats (#15050 ) Closes #14923 Authored by: bashonly	2025-11-16 22:39:22 +00:00
bashonly	6ee6a6fc58	[rh:urllib] Do not read after close (#15049 ) Fix regression introduced in `5767fb4ab1` Closes #15017 Authored by: bashonly	2025-11-16 19:07:48 +00:00
bashonly	23f1ab3469	[fd] Fix playback wait time for ffmpeg downloads (#15066 ) Authored by: bashonly	2025-11-16 18:15:16 +00:00
Haytam001	af285016d2	[ie/yfanefa] Add extractor (#15032 ) Closes #14974 Authored by: Haytam001	2025-11-16 12:02:13 +01:00
sepro	1dd84b9d1c	[ie/SoundcloudPlaylist] Support new API URLs (#15071 ) Closes #15068 Authored by: seproDev	2025-11-16 00:35:00 +01:00
sepro	b333ef1b3f	[ie/floatplane] Add subtitle support (#15069 ) Authored by: seproDev	2025-11-15 17:22:17 +01:00
Pedro Ferreira	4e680db150	[ie/NowCanal] Add extractor (#14584 ) Authored by: pferreir	2025-11-15 02:28:57 +01:00
sepro	45a3b42bb9	[ie/Bitmovin] Add extractor (#15064 ) Authored by: seproDev	2025-11-15 01:43:53 +01:00
Omar Merroun	d6aa8c235d	[ie/rinsefm] Fix extractors (#15020 ) Closes #14626 Authored by: 1bnBattuta, seproDev Co-authored-by: sepro <sepro@sepr0.com>	2025-11-14 20:17:30 +01:00
sepro	947e788340	[ie/jtbc] Fix extractor (#15047 ) Authored by: seproDev	2025-11-14 18:42:18 +01:00