Initial plan

[ie/floatplane] Add subtitle support (#15069 )
Authored by: seproDev
2025-12-16 06:05:41 +07:00 · 2025-11-15 19:16:16 +00:00 · 2025-11-15 17:22:17 +01:00 · 2025-11-15 02:28:57 +01:00 · 2025-11-15 01:43:53 +01:00 · 2025-11-14 20:17:30 +01:00
6 changed files with 150 additions and 17 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -268,6 +268,7 @@
    BitChuteChannelIE,
    BitChuteIE,
 )
+from .bitmovin import BitmovinIE
 from .blackboardcollaborate import (
    BlackboardCollaborateIE,
    BlackboardCollaborateLaunchIE,
@@ -1368,6 +1369,7 @@
    NovaIE,
 )
 from .novaplay import NovaPlayIE
+from .nowcanal import NowCanalIE
 from .nowness import (
    NownessIE,
    NownessPlaylistIE,
--- a/yt_dlp/extractor/bitmovin.py
+++ b/yt_dlp/extractor/bitmovin.py
@@ -0,0 +1,74 @@
+import re
+
+from .common import InfoExtractor
+from ..utils.traversal import traverse_obj
+
+
+class BitmovinIE(InfoExtractor):
+    _VALID_URL = r'https?://streams\.bitmovin\.com/(?P<id>\w+)'
+    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//streams\.bitmovin\.com/(?P<id>\w+)[^"\']+)']
+    _TESTS = [{
+        'url': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/embed',
+        'info_dict': {
+            'id': 'cqkl1t5giv3lrce7pjbg',
+            'ext': 'mp4',
+            'title': 'Developing Osteopathic Residents as Faculty',
+            'thumbnail': 'https://streams.bitmovin.com/cqkl1t5giv3lrce7pjbg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/share',
+        'info_dict': {
+            'id': 'cgl9rh94uvs51rqc8jhg',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny (Streams Docs)',
+            'thumbnail': 'https://streams.bitmovin.com/cgl9rh94uvs51rqc8jhg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+    _WEBPAGE_TESTS = [{
+        # bitmovin-stream web component
+        'url': 'https://www.institutionalinvestor.com/article/2bsw1in1l9k68mp9kritc/video-war-stories-over-board-games/best-case-i-get-fired-war-stories',
+        'info_dict': {
+            'id': 'cuiumeil6g115lc4li3g',
+            'ext': 'mp4',
+            'title': '[media] War Stories over Board Games: ‚ÄúBest Case: I Get Fired‚Äù ',
+            'thumbnail': 'https://streams.bitmovin.com/cuiumeil6g115lc4li3g/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        # iframe embed
+        'url': 'https://www.clearblueionizer.com/en/pool-ionizers/mineral-pool-vs-saltwater-pool/',
+        'info_dict': {
+            'id': 'cvpvfsm1pf7itg7cfvtg',
+            'ext': 'mp4',
+            'title': 'Pool Ionizer vs. Salt Chlorinator',
+            'thumbnail': 'https://streams.bitmovin.com/cvpvfsm1pf7itg7cfvtg/poster',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        yield from super()._extract_embed_urls(url, webpage)
+        for stream_id in re.findall(r'<bitmovin-stream\b[^>]*\bstream-id=["\'](?P<id>\w+)', webpage):
+            yield f'https://streams.bitmovin.com/{stream_id}'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        player_config = self._download_json(
+            f'https://streams.bitmovin.com/{video_id}/config', video_id)['sources']
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            player_config['hls'], video_id, 'mp4')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(player_config, {
+                'title': ('title', {str}),
+                'thumbnail': ('poster', {str}),
+            }),
+        }
--- a/yt_dlp/extractor/floatplane.py
+++ b/yt_dlp/extractor/floatplane.py
@@ -109,6 +109,17 @@ def _real_extract(self, url):
                    'hls_media_playlist_data': m3u8_data,
                    'hls_aes': hls_aes or None,
                })
+
+            subtitles = {}
+            automatic_captions = {}
+            for sub_data in traverse_obj(metadata, ('textTracks', lambda _, v: url_or_none(v['src']))):
+                sub_lang = sub_data.get('language') or 'en'
+                sub_entry = {'url': sub_data['src']}
+                if sub_data.get('generated'):
+                    automatic_captions.setdefault(sub_lang, []).append(sub_entry)
+                else:
+                    subtitles.setdefault(sub_lang, []).append(sub_entry)
+
            items.append({
                **common_info,
                'id': media_id,
@@ -118,6 +129,8 @@ def _real_extract(self, url):
                    'thumbnail': ('thumbnail', 'path', {url_or_none}),
                }),
                'formats': formats,
+                'subtitles': subtitles,
+                'automatic_captions': automatic_captions,
            })

        post_info = {
--- a/yt_dlp/extractor/jtbc.py
+++ b/yt_dlp/extractor/jtbc.py
@@ -98,7 +98,7 @@ def _real_extract(self, url):

        formats = []
        for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
-            stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
+            stream_url = re.sub(r'/playlist_pd\d+\.m3u8', '/playlist.m3u8', stream_url)
            formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))

        metadata = self._download_json(
--- a/yt_dlp/extractor/nowcanal.py
+++ b/yt_dlp/extractor/nowcanal.py
@@ -0,0 +1,37 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+
+class NowCanalIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nowcanal\.pt(?:/[\w-]+)+/detalhe/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.nowcanal.pt/ultimas/detalhe/pedro-sousa-hjulmand-pode-ter-uma-saida-limpa-do-sporting-daqui-a-um-ano',
+        'md5': '047f17cb783e66e467d703e704bbc95d',
+        'info_dict': {
+            'id': '6376598467112',
+            'ext': 'mp4',
+            'title': 'Pedro Sousa «Hjulmand pode ter uma saída limpa do Sporting daqui a um ano»',
+            'description': '',
+            'uploader_id': '6108484330001',
+            'duration': 65.237,
+            'thumbnail': r're:^https://.+\.jpg',
+            'timestamp': 1754440620,
+            'upload_date': '20250806',
+            'tags': ['now'],
+        },
+    }, {
+        'url': 'https://www.nowcanal.pt/programas/frente-a-frente/detalhe/frente-a-frente-eva-cruzeiro-ps-e-rita-matias-chega',
+        'only_matching': True,
+    }]
+
+    _BC_URL_TMPL = 'https://players.brightcove.net/6108484330001/chhIqzukMq_default/index.html?videoId={}'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_json(
+            r'videoHandler\.addBrightcoveVideoWithJson\(\[',
+            webpage, 'video data', display_id)['brightcoveVideoId']
+
+        return self.url_result(self._BC_URL_TMPL.format(video_id), BrightcoveNewIE)
--- a/yt_dlp/extractor/rinsefm.py
+++ b/yt_dlp/extractor/rinsefm.py
@@ -3,12 +3,14 @@
    MEDIA_EXTENSIONS,
    determine_ext,
    parse_iso8601,
-    traverse_obj,
    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class RinseFMBaseIE(InfoExtractor):
+    _API_BASE = 'https://rinse.fm/api/query/v1'
+
    @staticmethod
    def _parse_entry(entry):
        return {
@@ -45,8 +47,10 @@ class RinseFMIE(RinseFMBaseIE):

    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
+
+        entry = self._download_json(
+            f'{self._API_BASE}/episodes/{display_id}', display_id,
+            note='Downloading episode data from API')['entry']

        return self._parse_entry(entry)

@@ -58,32 +62,35 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE):
        'info_dict': {
            'id': 'resources',
            'title': '[re]sources',
-            'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.',
+            'description': 'md5:fd6a7254e8273510e6d49fbf50edf392',
        },
        'playlist_mincount': 40,
    }, {
-        'url': 'https://rinse.fm/shows/ivy/',
+        'url': 'https://www.rinse.fm/shows/esk',
        'info_dict': {
-            'id': 'ivy',
-            'title': '[IVY]',
-            'description': 'A dedicated space for DNB/Turbo House and 4x4.',
+            'id': 'esk',
+            'title': 'Esk',
+            'description': 'md5:5893d7c1d411ae8dea7fba12f109aa98',
        },
-        'playlist_mincount': 7,
+        'playlist_mincount': 139,
    }]

    def _entries(self, data):
        for episode in traverse_obj(data, (
-            'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
+            'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio),
        ):
            yield self._parse_entry(episode)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
-        description = self._og_search_description(webpage) or self._html_search_meta(
-            'description', webpage)
-        data = self._search_nextjs_data(webpage, playlist_id)
+
+        api_data = self._download_json(
+            f'{self._API_BASE}/shows/{playlist_id}', playlist_id,
+            note='Downloading show data from API')

        return self.playlist_result(
-            self._entries(data), playlist_id, title, description=description)
+            self._entries(api_data), playlist_id,
+            **traverse_obj(api_data, ('entry', {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+            })))
Author	SHA1	Message	Date
copilot-swe-agent[bot]	21ec21c936	Initial plan	2025-11-15 19:16:16 +00:00
sepro	b333ef1b3f	[ie/floatplane] Add subtitle support (#15069 ) Authored by: seproDev	2025-11-15 17:22:17 +01:00
Pedro Ferreira	4e680db150	[ie/NowCanal] Add extractor (#14584 ) Authored by: pferreir	2025-11-15 02:28:57 +01:00
sepro	45a3b42bb9	[ie/Bitmovin] Add extractor (#15064 ) Authored by: seproDev	2025-11-15 01:43:53 +01:00
Omar Merroun	d6aa8c235d	[ie/rinsefm] Fix extractors (#15020 ) Closes #14626 Authored by: 1bnBattuta, seproDev Co-authored-by: sepro <sepro@sepr0.com>	2025-11-14 20:17:30 +01:00
sepro	947e788340	[ie/jtbc] Fix extractor (#15047 ) Authored by: seproDev	2025-11-14 18:42:18 +01:00