[ie/tubitv] Support URLs with locales (#15205 )

Closes #15176 Authored by: 0xvd
[ie/dropbox] Support videos in folders (#15313 )
2025-12-19 07:35:41 +07:00 · 2025-12-19 00:26:53 +00:00 · 2025-12-19 00:24:13 +00:00 · 2025-12-18 23:42:50 +00:00 · 2025-12-18 23:42:13 +00:00 · 2025-12-18 21:36:15 +00:00
13 changed files with 119 additions and 374 deletions
--- a/test/helper.py
+++ b/test/helper.py
@@ -261,7 +261,7 @@ def sanitize(key, value):
 def expect_info_dict(self, got_dict, expected_dict):
    ALLOWED_KEYS_SORT_ORDER = (
        # NB: Keep in sync with the docstring of extractor/common.py
-        'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
+        'ie_key', 'url', 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
        'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified',
        'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count',
        'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap',
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1312,12 +1312,6 @@
 )
 from .newspicks import NewsPicksIE
 from .newsy import NewsyIE
 from .nextmedia import (
    AppleDailyIE,
    NextMediaActionNewsIE,
    NextMediaIE,
    NextTVIE,
 )
 from .nexx import (
    NexxEmbedIE,
    NexxIE,
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -279,7 +279,7 @@ def _real_extract(self, url):
                    'url': 'https://archive.org/' + track['file'].lstrip('/'),
                }
-        metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
+        metadata = self._download_json(f'https://archive.org/metadata/{identifier}', identifier)
        m = metadata['metadata']
        identifier = m['identifier']
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -14,7 +14,7 @@
 class DropboxIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/f[io]|sh?)/(?P<id>\w+)'
    _TESTS = [
        {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
@@ -35,6 +35,9 @@ class DropboxIE(InfoExtractor):
        }, {
            'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/scl/fo/zjfqse5txqfd7twa8iewj/AOfZzSYWUSKle2HD7XF7kzQ/A-BEAT%20C.mp4?rlkey=6tg3jkp4tv6a5vt58a6dag0mm&dl=0',
            'only_matching': True,
        },
    ]
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@@ -46,6 +46,7 @@ class GofileIE(InfoExtractor):
            'videopassword': 'password',
        },
    }]
    _STATIC_TOKEN = '4fd6sg89d7s6'  # From https://gofile.io/dist/js/config.js
    _TOKEN = None
    def _real_initialize(self):
@@ -60,13 +61,16 @@ def _real_initialize(self):
        self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
    def _entries(self, file_id):
-        query_params = {'wt': '4fd6sg89d7s6'}  # From https://gofile.io/dist/js/alljs.js
+        query_params = {}
-        password = self.get_param('videopassword')
+        if password := self.get_param('videopassword'):
        if password:
            query_params['password'] = hashlib.sha256(password.encode()).hexdigest()
        files = self._download_json(
            f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
-            query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'})
+            query=query_params, headers={
                'Authorization': f'Bearer {self._TOKEN}',
                'X-Website-Token': self._STATIC_TOKEN,
            })
        status = files['status']
        if status == 'error-passwordRequired':
--- a/yt_dlp/extractor/netzkino.py
+++ b/yt_dlp/extractor/netzkino.py
@@ -2,84 +2,59 @@
 from ..utils import (
    clean_html,
    int_or_none,
-    js_to_json,
+    url_or_none,
-    parse_iso8601,
+    urljoin,
 )
 from ..utils.traversal import traverse_obj
 class NetzkinoIE(InfoExtractor):
-    _WORKING = False
+    _GEO_COUNTRIES = ['DE']
-    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/details/(?P<id>[^/?#]+)'
    _TESTS = [{
-        'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+        'url': 'https://www.netzkino.de/details/snow-beast',
-        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+        'md5': '1a4c90fe40d3ccabce163287e45e56dd',
        'info_dict': {
-            'id': 'rakete-zum-mond',
+            'id': 'snow-beast',
            'ext': 'mp4',
-            'title': 'Rakete zum Mond \u2013 Jules Verne',
+            'title': 'Snow Beast',
            'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
            'upload_date': '20120813',
            'thumbnail': r're:https?://.*\.jpg$',
            'timestamp': 1344858571,
            'age_limit': 12,
-        },
+            'alt_title': 'Snow Beast',
-        'params': {
+            'cast': 'count:3',
-            'skip_download': 'Download only works from Germany',
+            'categories': 'count:7',
-        },
+            'creators': 'count:2',
-    }, {
+            'description': 'md5:e604a954a7f827a80e96a3a97d48b269',
-        'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
+            'location': 'US',
-        'md5': 'c7728b2dadd04ff6727814847a51ef03',
+            'release_year': 2011,
-        'info_dict': {
+            'thumbnail': r're:https?://.+\.jpg',
            'id': 'dr-jekyll-mrs-hyde-2',
            'ext': 'mp4',
            'title': 'Dr. Jekyll & Mrs. Hyde 2',
            'description': 'md5:c2e9626ebd02de0a794b95407045d186',
            'upload_date': '20190130',
            'thumbnail': r're:https?://.*\.jpg$',
            'timestamp': 1548849437,
            'age_limit': 18,
        },
        'params': {
            'skip_download': 'Download only works from Germany',
        },
    }]
    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
+        video_id = self._match_id(url)
-        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
        next_js_data = self._search_nextjs_data(webpage, video_id)
-        api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www'
+        query = traverse_obj(next_js_data, (
-        info = self._download_json(api_url, video_id)
+            'props', '__dehydratedState', 'queries', ..., 'state',
-        custom_fields = info['custom_fields']
+            'data', 'data', lambda _, v: v['__typename'] == 'CmsMovie', any))
-
+        if 'DRM' in traverse_obj(query, ('licenses', 'nodes', ..., 'properties', {str})):
-        production_js = self._download_webpage(
+            self.report_drm(video_id)
            'http://www.netzkino.de/beta/dist/production.min.js', video_id,
            note='Downloading player code')
        avo_js = self._search_regex(
            r'var urlTemplate=(\{.*?"\})',
            production_js, 'URL templates')
        templates = self._parse_json(
            avo_js, video_id, transform_source=js_to_json)
        suffix = {
            'hds': '.mp4/manifest.f4m',
            'hls': '.mp4/master.m3u8',
            'pmd': '.mp4',
        }
        film_fn = custom_fields['Streaming'][0]
        formats = [{
            'format_id': key,
            'ext': 'mp4',
            'url': tpl.replace('{}', film_fn) + suffix[key],
        } for key, tpl in templates.items()]
        return {
            'id': video_id,
-            'formats': formats,
+            **traverse_obj(query, {
-            'title': info['title'],
+                'title': ('originalTitle', {clean_html}),
-            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+                'age_limit': ('fskRating', {int_or_none}),
-            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+                'alt_title': ('originalTitle', {clean_html}, filter),
-            'description': clean_html(info.get('content')),
+                'cast': ('cast', 'nodes', ..., 'person', 'name', {clean_html}, filter),
-            'thumbnail': info.get('thumbnail'),
+                'creators': (('directors', 'writers'), 'nodes', ..., 'person', 'name', {clean_html}, filter),
                'categories': ('categories', 'nodes', ..., 'category', 'title', {clean_html}, filter),
                'description': ('longSynopsis', {clean_html}, filter),
                'duration': ('runtimeInSeconds', {int_or_none}),
                'location': ('productionCountry', {clean_html}, filter),
                'release_year': ('productionYear', {int_or_none}),
                'thumbnail': ('coverImage', 'masterUrl', {url_or_none}),
                'url': ('videoSource', 'pmdUrl', {urljoin('https://pmd.netzkino-seite.netzkino.de/')}),
            }),
        }
--- a/yt_dlp/extractor/nextmedia.py
+++ b/yt_dlp/extractor/nextmedia.py
@@ -1,238 +0,0 @@
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    get_element_by_class,
    int_or_none,
    parse_iso8601,
    remove_start,
    unified_timestamp,
 )
 class NextMediaIE(InfoExtractor):
    IE_DESC = '蘋果日報'
    _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
        'md5': 'dff9fad7009311c421176d1ac90bfe4f',
        'info_dict': {
            'id': '53109199',
            'ext': 'mp4',
            'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:28222b9912b6665a21011b034c70fcc7',
            'timestamp': 1415456273,
            'upload_date': '20141108',
        },
    }]
    _URL_PATTERN = r'\{ url: \'(.+)\' \}'
    def _real_extract(self, url):
        news_id = self._match_id(url)
        page = self._download_webpage(url, news_id)
        return self._extract_from_nextmedia_page(news_id, url, page)
    def _extract_from_nextmedia_page(self, news_id, url, page):
        redirection_url = self._search_regex(
            r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
            page, 'redirection URL', default=None, group='url')
        if redirection_url:
            return self.url_result(urllib.parse.urljoin(url, redirection_url))
        title = self._fetch_title(page)
        video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
        attrs = {
            'id': news_id,
            'title': title,
            'url': video_url,  # ext can be inferred from url
            'thumbnail': self._fetch_thumbnail(page),
            'description': self._fetch_description(page),
        }
        timestamp = self._fetch_timestamp(page)
        if timestamp:
            attrs['timestamp'] = timestamp
        else:
            attrs['upload_date'] = self._fetch_upload_date(url)
        return attrs
    def _fetch_title(self, page):
        return self._og_search_title(page)
    def _fetch_thumbnail(self, page):
        return self._og_search_thumbnail(page)
    def _fetch_timestamp(self, page):
        date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
        return parse_iso8601(date_created)
    def _fetch_upload_date(self, url):
        return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
    def _fetch_description(self, page):
        return self._og_search_property('description', page)
 class NextMediaActionNewsIE(NextMediaIE):  # XXX: Do not subclass from concrete IE
    IE_DESC = '蘋果日報 - 動新聞'
    _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
    _TESTS = [{
        'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
        'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
        'info_dict': {
            'id': '19009428',
            'ext': 'mp4',
            'title': '【壹週刊】細10年男友偷食　50歲邵美琪再失戀',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
            'timestamp': 1421791200,
            'upload_date': '20150120',
        },
    }]
    def _real_extract(self, url):
        news_id = self._match_id(url)
        actionnews_page = self._download_webpage(url, news_id)
        article_url = self._og_search_url(actionnews_page)
        article_page = self._download_webpage(article_url, news_id)
        return self._extract_from_nextmedia_page(news_id, url, article_page)
 class AppleDailyIE(NextMediaIE):  # XXX: Do not subclass from concrete IE
    IE_DESC = '臺灣蘋果日報'
    _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
    _TESTS = [{
        'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
        'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
        'info_dict': {
            'id': '36354694',
            'ext': 'mp4',
            'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
            'upload_date': '20150128',
        },
    }, {
        'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
        'md5': '86b4e9132d158279c7883822d94ccc49',
        'info_dict': {
            'id': '550549',
            'ext': 'mp4',
            'title': '不滿被踩腳　山東兩大媽一路打下車',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
            'upload_date': '20150128',
        },
    }, {
        'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
        'md5': '03df296d95dedc2d5886debbb80cb43f',
        'info_dict': {
            'id': '5003671',
            'ext': 'mp4',
            'title': '20正妹熱舞　《刀龍傳說Online》火辣上市',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
            'upload_date': '20150128',
        },
        'skip': 'redirect to http://www.appledaily.com.tw/animation/',
    }, {
        # No thumbnail
        'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
        'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
        'info_dict': {
            'id': '5003673',
            'ext': 'mp4',
            'title': '半夜尿尿　好像會看到___',
            'description': 'md5:61d2da7fe117fede148706cdb85ac066',
            'upload_date': '20150128',
        },
        'expected_warnings': [
            'video thumbnail',
        ],
        'skip': 'redirect to http://www.appledaily.com.tw/animation/',
    }, {
        'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
        'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
        'info_dict': {
            'id': '35770334',
            'ext': 'mp4',
            'title': '咖啡占卜測 XU裝熟指數',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
            'upload_date': '20140417',
        },
    }, {
        'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
        'only_matching': True,
    }, {
        # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
        'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
        'only_matching': True,
    }]
    _URL_PATTERN = r'\{url: \'(.+)\'\}'
    def _fetch_title(self, page):
        return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
                or self._html_search_meta('description', page, 'news title'))
    def _fetch_thumbnail(self, page):
        return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
    def _fetch_timestamp(self, page):
        return None
    def _fetch_description(self, page):
        return self._html_search_meta('description', page, 'news description')
 class NextTVIE(InfoExtractor):
    _WORKING = False
    _ENABLED = None  # XXX: pass through to GenericIE
    IE_DESC = '壹電視'
    _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
        'info_dict': {
            'id': '11779671',
            'ext': 'mp4',
            'title': '「超收稅」近4千億！　藍議員籲發消費券',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1484825400,
            'upload_date': '20170119',
            'view_count': int,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
            r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
        data = self._hidden_inputs(webpage)
        video_url = data['ntt-vod-src-detailview']
        date_str = get_element_by_class('date', webpage)
        timestamp = unified_timestamp(date_str + '+0800') if date_str else None
        view_count = int_or_none(remove_start(
            clean_html(get_element_by_class('click', webpage)), '點閱：'))
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': data.get('ntt-vod-img-src'),
            'timestamp': timestamp,
            'view_count': view_count,
        }
--- a/yt_dlp/extractor/parti.py
+++ b/yt_dlp/extractor/parti.py
@@ -6,7 +6,10 @@
 class PartiBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, note=None):
        return self._download_json(
-            f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
+            f'https://prod-api.parti.com/parti_v2/profile/{path}', video_id, note, headers={
                'Origin': 'https://parti.com',
                'Referer': 'https://parti.com/',
            })
 class PartiVideoIE(PartiBaseIE):
@@ -20,7 +23,7 @@ class PartiVideoIE(PartiBaseIE):
            'title': 'NOW LIVE ',
            'upload_date': '20250327',
            'categories': ['Gaming'],
-            'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
+            'thumbnail': 'https://media.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
            'channel': 'ItZTMGG',
            'timestamp': 1743044379,
        },
@@ -34,7 +37,7 @@ def _real_extract(self, url):
        return {
            'id': video_id,
            'formats': self._extract_m3u8_formats(
-                urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
+                urljoin('https://media.parti.com/', data['livestream_recording']), video_id, 'mp4'),
            **traverse_obj(data, {
                'title': ('event_title', {str}),
                'channel': ('user_name', {str}),
@@ -47,32 +50,27 @@ def _real_extract(self, url):
 class PartiLivestreamIE(PartiBaseIE):
    IE_NAME = 'parti:livestream'
-    _VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
+    _VALID_URL = r'https?://(?:www\.)?parti\.com/(?!video/)(?P<id>[\w/-]+)'
    _TESTS = [{
-        'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
+        'url': 'https://parti.com/247CryptoTracker',
        'info_dict': {
            'id': 'Capt_Robs_Adventures',
            'ext': 'mp4',
            'id': '247CryptoTracker',
            'description': 'md5:a78051f3d7e66e6a64c6b1eaf59fd364',
            'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
-            'view_count': int,
+            'thumbnail': r're:https://media\.parti\.com/stream-screenshots/.+\.png',
            'thumbnail': r're:https://assets\.parti\.com/.+\.png',
            'timestamp': 1743879776,
            'upload_date': '20250405',
            'live_status': 'is_live',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://parti.com/creator/discord/sazboxgaming/0',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        service, creator_slug = self._match_valid_url(url).group('service', 'id')
+        creator_slug = self._match_id(url)
        encoded_creator_slug = creator_slug.replace('/', '%23')
        creator_id = self._call_api(
-            f'get_user_by_social_media/{service}/{encoded_creator_slug}',
+            f'user_id_from_name/{encoded_creator_slug}',
-            creator_slug, note='Fetching user ID')
+            creator_slug, note='Fetching user ID')['user_id']
        data = self._call_api(
            f'get_livestream_channel_info/{creator_id}', creator_id,
@@ -85,11 +83,7 @@ def _real_extract(self, url):
        return {
            'id': creator_slug,
-            'formats': self._extract_m3u8_formats(
+            'formats': self._extract_m3u8_formats(channel_info['playback_url'], creator_slug, live=True),
                channel_info['playback_url'], creator_slug, live=True, query={
                    'token': channel_info['playback_auth_token'],
                    'player_version': '1.17.0',
                }),
            'is_live': True,
            **traverse_obj(data, {
                'title': ('livestream_event_info', 'event_name', {str}),
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -24,6 +24,7 @@
    url_or_none,
    urlencode_postdata,
 )
 from ..utils.traversal import find_elements, traverse_obj
 class PornHubBaseIE(InfoExtractor):
@@ -137,23 +138,24 @@ class PornHubIE(PornHubBaseIE):
    _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
-        'md5': 'a6391306d050e4547f62b3f485dd9ba9',
+        'md5': '4d4a4e9178b655776f86cf89ecaf0edf',
        'info_dict': {
            'id': '648719015',
            'ext': 'mp4',
            'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
-            'uploader': 'Babes',
+            'uploader': 'BABES-COM',
            'uploader_id': '/users/babes-com',
            'upload_date': '20130628',
            'timestamp': 1372447216,
            'duration': 361,
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
            'age_limit': 18,
            'tags': list,
            'categories': list,
            'cast': list,
            'thumbnail': r're:https?://.+',
        },
    }, {
        # non-ASCII title
@@ -480,13 +482,6 @@ def extract_vote_count(kind, name):
        comment_count = self._extract_count(
            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
        def extract_list(meta_key):
            div = self._search_regex(
                rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>',
                webpage, meta_key, default=None)
            if div:
                return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
        info = self._search_json_ld(webpage, video_id, default={})
        # description provided in JSON-LD is irrelevant
        info['description'] = None
@@ -505,9 +500,11 @@ def extract_list(meta_key):
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': 18,
-            'tags': extract_list('tags'),
+            **traverse_obj(webpage, {
-            'categories': extract_list('categories'),
+                'tags': ({find_elements(attr='data-label', value='tag')}, ..., {clean_html}),
-            'cast': extract_list('pornstars'),
+                'categories': ({find_elements(attr='data-label', value='category')}, ..., {clean_html}),
                'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
            }),
            'subtitles': subtitles,
        }, info)
--- a/yt_dlp/extractor/telecinco.py
+++ b/yt_dlp/extractor/telecinco.py
@@ -6,20 +6,21 @@
 from ..utils import (
    ExtractorError,
    clean_html,
    extract_attributes,
    int_or_none,
    join_nonempty,
    str_or_none,
    traverse_obj,
    update_url,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class TelecincoBaseIE(InfoExtractor):
    def _parse_content(self, content, url):
-        video_id = content['dataMediaId']
+        video_id = content['dataMediaId'][1]
        config = self._download_json(
-            content['dataConfig'], video_id, 'Downloading config JSON')
+            content['dataConfig'][1], video_id, 'Downloading config JSON')
        services = config['services']
        caronte = self._download_json(services['caronte'], video_id)
        if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
@@ -57,9 +58,9 @@ def _parse_content(self, content, url):
            'id': video_id,
            'title': traverse_obj(config, ('info', 'title', {str})),
            'formats': formats,
-            'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
+            'thumbnail': (traverse_obj(content, ('dataPoster', 1, {url_or_none}))
                          or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
-            'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
+            'duration': traverse_obj(content, ('dataDuration', 1, {int_or_none})),
            'http_headers': headers,
        }
@@ -137,30 +138,45 @@ class TelecincoIE(TelecincoBaseIE):
        'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
        'only_matching': True,
    }]
    _ASTRO_ISLAND_RE = re.compile(r'<astro-island\b[^>]+>')
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id, impersonate=True)
        article = self._search_json(
            r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
            webpage, 'article', display_id)['article']
        description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
-        if article.get('editorialType') != 'VID':
+        props_list = traverse_obj(webpage, (
            {self._ASTRO_ISLAND_RE.findall}, ...,
            {extract_attributes}, 'props', {json.loads}))
        description = traverse_obj(props_list, (..., 'leadParagraph', 1, {clean_html}, any, filter))
        main_content = traverse_obj(props_list, (..., ('content', ('articleData', 1, 'opening')), 1, {dict}, any))
        if traverse_obj(props_list, (..., 'editorialType', 1, {str}, any)) != 'VID':  # e.g. 'ART'
            entries = []
-            for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
+            for p in traverse_obj(props_list, (..., 'articleData', 1, ('opening', ('body', 1, ...)), 1, {dict})):
-                content = p['content']
+                type_ = traverse_obj(p, ('type', 1, {str}))
-                type_ = p.get('type')
+                content = traverse_obj(p, ('content', 1, {str} if type_ == 'paragraph' else {dict}))
-                if type_ == 'paragraph' and isinstance(content, str):
+                if not content:
                    continue
                if type_ == 'paragraph':
                    description = join_nonempty(description, content, delim='')
-                elif type_ == 'video' and isinstance(content, dict):
+                elif type_ == 'video':
                    entries.append(self._parse_content(content, url))
                else:
                    self.report_warning(
                        f'Skipping unsupported content type "{type_}"', display_id, only_once=True)
            return self.playlist_result(
-                entries, str_or_none(article.get('id')),
+                entries,
-                traverse_obj(article, ('title', {str})), clean_html(description))
+                traverse_obj(props_list, (..., 'id', 1, {int}, {str_or_none}, any)) or display_id,
                traverse_obj(main_content, ('dataTitle', 1, {str})),
                clean_html(description))
-        info = self._parse_content(article['opening']['content'], url)
+        if not main_content:
            raise ExtractorError('Unable to extract main content from webpage')
        info = self._parse_content(main_content, url)
        info['description'] = description
        return info
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -15,7 +15,7 @@
 class TubiTvIE(InfoExtractor):
    IE_NAME = 'tubitv'
-    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:[a-z]{2}-[a-z]{2}/)?(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
    _LOGIN_URL = 'http://tubitv.com/login'
    _NETRC_MACHINE = 'tubitv'
    _TESTS = [{
@@ -73,6 +73,9 @@ class TubiTvIE(InfoExtractor):
            'release_year': 1979,
        },
        'skip': 'Content Unavailable',
    }, {
        'url': 'https://tubitv.com/es-mx/tv-shows/477363/s01-e03-jacob-dos-dos-y-la-tarjets-de-hockey-robada',
        'only_matching': True,
    }]
    # DRM formats are included only to raise appropriate error
--- a/yt_dlp/extractor/youtube/_tab.py
+++ b/yt_dlp/extractor/youtube/_tab.py
@@ -382,7 +382,8 @@ def _rich_entries(self, rich_grid_renderer):
                        ('accessibilityText', {lambda x: re.fullmatch(r'(.+), (?:[\d,.]+(?:[KM]| million)?|No) views? - play Short', x)}, 1)), any),
                    'view_count': ('overlayMetadata', 'secondaryText', 'content', {parse_count}),
                }),
-                thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
+                thumbnails=self._extract_thumbnails(
                    renderer, ('thumbnailViewModel', 'thumbnailViewModel', 'image'), final_key='sources'))
            return
    def _video_entry(self, video_renderer):
@@ -1585,7 +1586,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'playlist_count': 50,
        'expected_warnings': ['YouTube Music is not directly supported'],
    }, {
        # TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
        'note': 'unlisted single video playlist',
        'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
        'info_dict': {
@@ -1885,8 +1885,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'playlist_mincount': 30,
    }, {
        # Shorts url result in shorts tab
        # TODO: Fix channel id extraction
        # TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
        'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
        'info_dict': {
            'id': 'UCiu-3thuViMebBjw_5nWYrA',
@@ -1915,7 +1913,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        'params': {'extract_flat': True},
    }, {
        # Live video status should be extracted
        # TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
        'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
        'info_dict': {
            'id': 'UCQvWX73GQygcwXOTSf_VDVg',
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1212,7 +1212,7 @@ def _preset_alias_callback(option, opt_str, value, parser):
        help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
    workarounds.add_option(
        '--sleep-subtitles', metavar='SECONDS',
-        dest='sleep_interval_subtitles', default=0, type=int,
+        dest='sleep_interval_subtitles', default=0, type=float,
        help='Number of seconds to sleep before each subtitle download')
    verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
Author	SHA1	Message	Date
0x∅	f0bc71abf6	[ie/tubitv] Support URLs with locales (#15205 ) Closes #15176 Authored by: 0xvd	2025-12-19 00:26:53 +00:00
0x∅	8a4b626daf	[ie/dropbox] Support videos in folders (#15313 ) Closes #15312 Authored by: 0xvd	2025-12-19 00:24:13 +00:00
0x∅	f6dc7d5279	Accept float values for `--sleep-subtitles` (#15282 ) Closes #15269 Authored by: 0xvd	2025-12-18 23:42:50 +00:00
quietvoid	c5e55e0479	[ie/gofile] Fix extractor (#15296 ) Authored by: quietvoid	2025-12-18 23:42:13 +00:00
doe1080	6d4984e64e	[ie/nextmedia] Remove extractors (#15354 ) Authored by: doe1080	2025-12-18 21:36:15 +00:00
doe1080	a27ec9efc6	[ie/netzkino] Rework extractor (#15351 ) Authored by: doe1080	2025-12-18 21:32:54 +00:00
bashonly	ff61bef041	[ie/youtube:tab] Fix flat thumbnails extraction for shorts (#15331 ) Closes #15329 Authored by: bashonly	2025-12-15 22:37:25 +00:00
sepro	04f2ec4b97	[ie/parti] Fix extractors (#15319 ) Authored by: seproDev	2025-12-13 20:00:56 +01:00
0x∅	b6f24745bf	[ie/telecinco] Fix extractor (#15311 ) Closes #15240 Authored by: 0xvd, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-12-12 22:25:45 +00:00
norepro	f2ee2a46fc	[ie/pornhub] Optimize metadata extraction (#15231 ) Closes #14621 Authored by: norepro	2025-12-12 20:52:09 +00:00
bashonly	5f37f67d37	[ie/archive.org] Fix metadata extraction (#15286 ) Closes #15280 Authored by: bashonly	2025-12-09 19:05:12 +00:00