1
0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-12-19 07:35:41 +07:00

Compare commits

..

11 Commits

Author SHA1 Message Date
0x∅
f0bc71abf6 [ie/tubitv] Support URLs with locales (#15205)
Closes #15176
Authored by: 0xvd
2025-12-19 00:26:53 +00:00
0x∅
8a4b626daf [ie/dropbox] Support videos in folders (#15313)
Closes #15312
Authored by: 0xvd
2025-12-19 00:24:13 +00:00
0x∅
f6dc7d5279 Accept float values for --sleep-subtitles (#15282)
Closes #15269
Authored by: 0xvd
2025-12-18 23:42:50 +00:00
quietvoid
c5e55e0479 [ie/gofile] Fix extractor (#15296)
Authored by: quietvoid
2025-12-18 23:42:13 +00:00
doe1080
6d4984e64e [ie/nextmedia] Remove extractors (#15354)
Authored by: doe1080
2025-12-18 21:36:15 +00:00
doe1080
a27ec9efc6 [ie/netzkino] Rework extractor (#15351)
Authored by: doe1080
2025-12-18 21:32:54 +00:00
bashonly
ff61bef041 [ie/youtube:tab] Fix flat thumbnails extraction for shorts (#15331)
Closes #15329
Authored by: bashonly
2025-12-15 22:37:25 +00:00
sepro
04f2ec4b97 [ie/parti] Fix extractors (#15319)
Authored by: seproDev
2025-12-13 20:00:56 +01:00
0x∅
b6f24745bf [ie/telecinco] Fix extractor (#15311)
Closes #15240
Authored by: 0xvd, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-12-12 22:25:45 +00:00
norepro
f2ee2a46fc [ie/pornhub] Optimize metadata extraction (#15231)
Closes #14621
Authored by: norepro
2025-12-12 20:52:09 +00:00
bashonly
5f37f67d37 [ie/archive.org] Fix metadata extraction (#15286)
Closes #15280
Authored by: bashonly
2025-12-09 19:05:12 +00:00
13 changed files with 119 additions and 374 deletions

View File

@@ -261,7 +261,7 @@ def sanitize(key, value):
def expect_info_dict(self, got_dict, expected_dict): def expect_info_dict(self, got_dict, expected_dict):
ALLOWED_KEYS_SORT_ORDER = ( ALLOWED_KEYS_SORT_ORDER = (
# NB: Keep in sync with the docstring of extractor/common.py # NB: Keep in sync with the docstring of extractor/common.py
'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type', 'ie_key', 'url', 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified', 'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified',
'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', 'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count',
'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap',

View File

@@ -1312,12 +1312,6 @@
) )
from .newspicks import NewsPicksIE from .newspicks import NewsPicksIE
from .newsy import NewsyIE from .newsy import NewsyIE
from .nextmedia import (
AppleDailyIE,
NextMediaActionNewsIE,
NextMediaIE,
NextTVIE,
)
from .nexx import ( from .nexx import (
NexxEmbedIE, NexxEmbedIE,
NexxIE, NexxIE,

View File

@@ -279,7 +279,7 @@ def _real_extract(self, url):
'url': 'https://archive.org/' + track['file'].lstrip('/'), 'url': 'https://archive.org/' + track['file'].lstrip('/'),
} }
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) metadata = self._download_json(f'https://archive.org/metadata/{identifier}', identifier)
m = metadata['metadata'] m = metadata['metadata']
identifier = m['identifier'] identifier = m['identifier']

View File

@@ -14,7 +14,7 @@
class DropboxIE(InfoExtractor): class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/f[io]|sh?)/(?P<id>\w+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
@@ -35,6 +35,9 @@ class DropboxIE(InfoExtractor):
}, { }, {
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h', 'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dropbox.com/scl/fo/zjfqse5txqfd7twa8iewj/AOfZzSYWUSKle2HD7XF7kzQ/A-BEAT%20C.mp4?rlkey=6tg3jkp4tv6a5vt58a6dag0mm&dl=0',
'only_matching': True,
}, },
] ]

View File

@@ -46,6 +46,7 @@ class GofileIE(InfoExtractor):
'videopassword': 'password', 'videopassword': 'password',
}, },
}] }]
_STATIC_TOKEN = '4fd6sg89d7s6' # From https://gofile.io/dist/js/config.js
_TOKEN = None _TOKEN = None
def _real_initialize(self): def _real_initialize(self):
@@ -60,13 +61,16 @@ def _real_initialize(self):
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id): def _entries(self, file_id):
query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js query_params = {}
password = self.get_param('videopassword') if password := self.get_param('videopassword'):
if password:
query_params['password'] = hashlib.sha256(password.encode()).hexdigest() query_params['password'] = hashlib.sha256(password.encode()).hexdigest()
files = self._download_json( files = self._download_json(
f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist', f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'}) query=query_params, headers={
'Authorization': f'Bearer {self._TOKEN}',
'X-Website-Token': self._STATIC_TOKEN,
})
status = files['status'] status = files['status']
if status == 'error-passwordRequired': if status == 'error-passwordRequired':

View File

@@ -2,84 +2,59 @@
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
js_to_json, url_or_none,
parse_iso8601, urljoin,
) )
from ..utils.traversal import traverse_obj
class NetzkinoIE(InfoExtractor): class NetzkinoIE(InfoExtractor):
_WORKING = False _GEO_COUNTRIES = ['DE']
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?netzkino\.de/details/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond', 'url': 'https://www.netzkino.de/details/snow-beast',
'md5': '92a3f8b76f8d7220acce5377ea5d4873', 'md5': '1a4c90fe40d3ccabce163287e45e56dd',
'info_dict': { 'info_dict': {
'id': 'rakete-zum-mond', 'id': 'snow-beast',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rakete zum Mond \u2013 Jules Verne', 'title': 'Snow Beast',
'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
'upload_date': '20120813',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1344858571,
'age_limit': 12, 'age_limit': 12,
}, 'alt_title': 'Snow Beast',
'params': { 'cast': 'count:3',
'skip_download': 'Download only works from Germany', 'categories': 'count:7',
}, 'creators': 'count:2',
}, { 'description': 'md5:e604a954a7f827a80e96a3a97d48b269',
'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2', 'location': 'US',
'md5': 'c7728b2dadd04ff6727814847a51ef03', 'release_year': 2011,
'info_dict': { 'thumbnail': r're:https?://.+\.jpg',
'id': 'dr-jekyll-mrs-hyde-2',
'ext': 'mp4',
'title': 'Dr. Jekyll & Mrs. Hyde 2',
'description': 'md5:c2e9626ebd02de0a794b95407045d186',
'upload_date': '20190130',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1548849437,
'age_limit': 18,
},
'params': {
'skip_download': 'Download only works from Germany',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) video_id = self._match_id(url)
video_id = mobj.group('id') webpage = self._download_webpage(url, video_id)
next_js_data = self._search_nextjs_data(webpage, video_id)
api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www' query = traverse_obj(next_js_data, (
info = self._download_json(api_url, video_id) 'props', '__dehydratedState', 'queries', ..., 'state',
custom_fields = info['custom_fields'] 'data', 'data', lambda _, v: v['__typename'] == 'CmsMovie', any))
if 'DRM' in traverse_obj(query, ('licenses', 'nodes', ..., 'properties', {str})):
production_js = self._download_webpage( self.report_drm(video_id)
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
note='Downloading player code')
avo_js = self._search_regex(
r'var urlTemplate=(\{.*?"\})',
production_js, 'URL templates')
templates = self._parse_json(
avo_js, video_id, transform_source=js_to_json)
suffix = {
'hds': '.mp4/manifest.f4m',
'hls': '.mp4/master.m3u8',
'pmd': '.mp4',
}
film_fn = custom_fields['Streaming'][0]
formats = [{
'format_id': key,
'ext': 'mp4',
'url': tpl.replace('{}', film_fn) + suffix[key],
} for key, tpl in templates.items()]
return { return {
'id': video_id, 'id': video_id,
'formats': formats, **traverse_obj(query, {
'title': info['title'], 'title': ('originalTitle', {clean_html}),
'age_limit': int_or_none(custom_fields.get('FSK')[0]), 'age_limit': ('fskRating', {int_or_none}),
'timestamp': parse_iso8601(info.get('date'), delimiter=' '), 'alt_title': ('originalTitle', {clean_html}, filter),
'description': clean_html(info.get('content')), 'cast': ('cast', 'nodes', ..., 'person', 'name', {clean_html}, filter),
'thumbnail': info.get('thumbnail'), 'creators': (('directors', 'writers'), 'nodes', ..., 'person', 'name', {clean_html}, filter),
'categories': ('categories', 'nodes', ..., 'category', 'title', {clean_html}, filter),
'description': ('longSynopsis', {clean_html}, filter),
'duration': ('runtimeInSeconds', {int_or_none}),
'location': ('productionCountry', {clean_html}, filter),
'release_year': ('productionYear', {int_or_none}),
'thumbnail': ('coverImage', 'masterUrl', {url_or_none}),
'url': ('videoSource', 'pmdUrl', {urljoin('https://pmd.netzkino-seite.netzkino.de/')}),
}),
} }

View File

@@ -1,238 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
int_or_none,
parse_iso8601,
remove_start,
unified_timestamp,
)
class NextMediaIE(InfoExtractor):
IE_DESC = '蘋果日報'
_VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
'info_dict': {
'id': '53109199',
'ext': 'mp4',
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
'timestamp': 1415456273,
'upload_date': '20141108',
},
}]
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
def _real_extract(self, url):
news_id = self._match_id(url)
page = self._download_webpage(url, news_id)
return self._extract_from_nextmedia_page(news_id, url, page)
def _extract_from_nextmedia_page(self, news_id, url, page):
redirection_url = self._search_regex(
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
page, 'redirection URL', default=None, group='url')
if redirection_url:
return self.url_result(urllib.parse.urljoin(url, redirection_url))
title = self._fetch_title(page)
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
attrs = {
'id': news_id,
'title': title,
'url': video_url, # ext can be inferred from url
'thumbnail': self._fetch_thumbnail(page),
'description': self._fetch_description(page),
}
timestamp = self._fetch_timestamp(page)
if timestamp:
attrs['timestamp'] = timestamp
else:
attrs['upload_date'] = self._fetch_upload_date(url)
return attrs
def _fetch_title(self, page):
return self._og_search_title(page)
def _fetch_thumbnail(self, page):
return self._og_search_thumbnail(page)
def _fetch_timestamp(self, page):
date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
return parse_iso8601(date_created)
def _fetch_upload_date(self, url):
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
def _fetch_description(self, page):
return self._og_search_property('description', page)
class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '蘋果日報 - 動新聞'
_VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
_TESTS = [{
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
'info_dict': {
'id': '19009428',
'ext': 'mp4',
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
'timestamp': 1421791200,
'upload_date': '20150120',
},
}]
def _real_extract(self, url):
news_id = self._match_id(url)
actionnews_page = self._download_webpage(url, news_id)
article_url = self._og_search_url(actionnews_page)
article_page = self._download_webpage(article_url, news_id)
return self._extract_from_nextmedia_page(news_id, url, article_page)
class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '臺灣蘋果日報'
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
'info_dict': {
'id': '36354694',
'ext': 'mp4',
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
'upload_date': '20150128',
},
}, {
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
'md5': '86b4e9132d158279c7883822d94ccc49',
'info_dict': {
'id': '550549',
'ext': 'mp4',
'title': '不滿被踩腳 山東兩大媽一路打下車',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
'upload_date': '20150128',
},
}, {
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
'md5': '03df296d95dedc2d5886debbb80cb43f',
'info_dict': {
'id': '5003671',
'ext': 'mp4',
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
'upload_date': '20150128',
},
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
}, {
# No thumbnail
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
'info_dict': {
'id': '5003673',
'ext': 'mp4',
'title': '半夜尿尿 好像會看到___',
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
'upload_date': '20150128',
},
'expected_warnings': [
'video thumbnail',
],
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
}, {
'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
'info_dict': {
'id': '35770334',
'ext': 'mp4',
'title': '咖啡占卜測 XU裝熟指數',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
'upload_date': '20140417',
},
}, {
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
'only_matching': True,
}, {
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
'only_matching': True,
}]
_URL_PATTERN = r'\{url: \'(.+)\'\}'
def _fetch_title(self, page):
return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
or self._html_search_meta('description', page, 'news title'))
def _fetch_thumbnail(self, page):
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
def _fetch_timestamp(self, page):
return None
def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description')
class NextTVIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_DESC = '壹電視'
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
_TEST = {
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
'info_dict': {
'id': '11779671',
'ext': 'mp4',
'title': '「超收稅」近4千億 藍議員籲發消費券',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1484825400,
'upload_date': '20170119',
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
data = self._hidden_inputs(webpage)
video_url = data['ntt-vod-src-detailview']
date_str = get_element_by_class('date', webpage)
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
view_count = int_or_none(remove_start(
clean_html(get_element_by_class('click', webpage)), '點閱:'))
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': data.get('ntt-vod-img-src'),
'timestamp': timestamp,
'view_count': view_count,
}

View File

@@ -6,7 +6,10 @@
class PartiBaseIE(InfoExtractor): class PartiBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note=None): def _call_api(self, path, video_id, note=None):
return self._download_json( return self._download_json(
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note) f'https://prod-api.parti.com/parti_v2/profile/{path}', video_id, note, headers={
'Origin': 'https://parti.com',
'Referer': 'https://parti.com/',
})
class PartiVideoIE(PartiBaseIE): class PartiVideoIE(PartiBaseIE):
@@ -20,7 +23,7 @@ class PartiVideoIE(PartiBaseIE):
'title': 'NOW LIVE ', 'title': 'NOW LIVE ',
'upload_date': '20250327', 'upload_date': '20250327',
'categories': ['Gaming'], 'categories': ['Gaming'],
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png', 'thumbnail': 'https://media.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
'channel': 'ItZTMGG', 'channel': 'ItZTMGG',
'timestamp': 1743044379, 'timestamp': 1743044379,
}, },
@@ -34,7 +37,7 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats( 'formats': self._extract_m3u8_formats(
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'), urljoin('https://media.parti.com/', data['livestream_recording']), video_id, 'mp4'),
**traverse_obj(data, { **traverse_obj(data, {
'title': ('event_title', {str}), 'title': ('event_title', {str}),
'channel': ('user_name', {str}), 'channel': ('user_name', {str}),
@@ -47,32 +50,27 @@ def _real_extract(self, url):
class PartiLivestreamIE(PartiBaseIE): class PartiLivestreamIE(PartiBaseIE):
IE_NAME = 'parti:livestream' IE_NAME = 'parti:livestream'
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)' _VALID_URL = r'https?://(?:www\.)?parti\.com/(?!video/)(?P<id>[\w/-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures', 'url': 'https://parti.com/247CryptoTracker',
'info_dict': { 'info_dict': {
'id': 'Capt_Robs_Adventures',
'ext': 'mp4', 'ext': 'mp4',
'id': '247CryptoTracker',
'description': 'md5:a78051f3d7e66e6a64c6b1eaf59fd364',
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}", 'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
'view_count': int, 'thumbnail': r're:https://media\.parti\.com/stream-screenshots/.+\.png',
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
'timestamp': 1743879776,
'upload_date': '20250405',
'live_status': 'is_live', 'live_status': 'is_live',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
service, creator_slug = self._match_valid_url(url).group('service', 'id') creator_slug = self._match_id(url)
encoded_creator_slug = creator_slug.replace('/', '%23') encoded_creator_slug = creator_slug.replace('/', '%23')
creator_id = self._call_api( creator_id = self._call_api(
f'get_user_by_social_media/{service}/{encoded_creator_slug}', f'user_id_from_name/{encoded_creator_slug}',
creator_slug, note='Fetching user ID') creator_slug, note='Fetching user ID')['user_id']
data = self._call_api( data = self._call_api(
f'get_livestream_channel_info/{creator_id}', creator_id, f'get_livestream_channel_info/{creator_id}', creator_id,
@@ -85,11 +83,7 @@ def _real_extract(self, url):
return { return {
'id': creator_slug, 'id': creator_slug,
'formats': self._extract_m3u8_formats( 'formats': self._extract_m3u8_formats(channel_info['playback_url'], creator_slug, live=True),
channel_info['playback_url'], creator_slug, live=True, query={
'token': channel_info['playback_auth_token'],
'player_version': '1.17.0',
}),
'is_live': True, 'is_live': True,
**traverse_obj(data, { **traverse_obj(data, {
'title': ('livestream_event_info', 'event_name', {str}), 'title': ('livestream_event_info', 'event_name', {str}),

View File

@@ -24,6 +24,7 @@
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import find_elements, traverse_obj
class PornHubBaseIE(InfoExtractor): class PornHubBaseIE(InfoExtractor):
@@ -137,23 +138,24 @@ class PornHubIE(PornHubBaseIE):
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'md5': '4d4a4e9178b655776f86cf89ecaf0edf',
'info_dict': { 'info_dict': {
'id': '648719015', 'id': '648719015',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes', 'uploader': 'BABES-COM',
'uploader_id': '/users/babes-com',
'upload_date': '20130628', 'upload_date': '20130628',
'timestamp': 1372447216, 'timestamp': 1372447216,
'duration': 361, 'duration': 361,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': list, 'tags': list,
'categories': list, 'categories': list,
'cast': list, 'cast': list,
'thumbnail': r're:https?://.+',
}, },
}, { }, {
# non-ASCII title # non-ASCII title
@@ -480,13 +482,6 @@ def extract_vote_count(kind, name):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
def extract_list(meta_key):
div = self._search_regex(
rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>',
webpage, meta_key, default=None)
if div:
return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant # description provided in JSON-LD is irrelevant
info['description'] = None info['description'] = None
@@ -505,9 +500,11 @@ def extract_list(meta_key):
'comment_count': comment_count, 'comment_count': comment_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
'tags': extract_list('tags'), **traverse_obj(webpage, {
'categories': extract_list('categories'), 'tags': ({find_elements(attr='data-label', value='tag')}, ..., {clean_html}),
'cast': extract_list('pornstars'), 'categories': ({find_elements(attr='data-label', value='category')}, ..., {clean_html}),
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
}),
'subtitles': subtitles, 'subtitles': subtitles,
}, info) }, info)

View File

@@ -6,20 +6,21 @@
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
extract_attributes,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
str_or_none, str_or_none,
traverse_obj,
update_url, update_url,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class TelecincoBaseIE(InfoExtractor): class TelecincoBaseIE(InfoExtractor):
def _parse_content(self, content, url): def _parse_content(self, content, url):
video_id = content['dataMediaId'] video_id = content['dataMediaId'][1]
config = self._download_json( config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON') content['dataConfig'][1], video_id, 'Downloading config JSON')
services = config['services'] services = config['services']
caronte = self._download_json(services['caronte'], video_id) caronte = self._download_json(services['caronte'], video_id)
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})): if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
@@ -57,9 +58,9 @@ def _parse_content(self, content, url):
'id': video_id, 'id': video_id,
'title': traverse_obj(config, ('info', 'title', {str})), 'title': traverse_obj(config, ('info', 'title', {str})),
'formats': formats, 'formats': formats,
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none})) 'thumbnail': (traverse_obj(content, ('dataPoster', 1, {url_or_none}))
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)), or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
'duration': traverse_obj(content, ('dataDuration', {int_or_none})), 'duration': traverse_obj(content, ('dataDuration', 1, {int_or_none})),
'http_headers': headers, 'http_headers': headers,
} }
@@ -137,30 +138,45 @@ class TelecincoIE(TelecincoBaseIE):
'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
'only_matching': True, 'only_matching': True,
}] }]
_ASTRO_ISLAND_RE = re.compile(r'<astro-island\b[^>]+>')
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, impersonate=True) webpage = self._download_webpage(url, display_id, impersonate=True)
article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article', display_id)['article']
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
if article.get('editorialType') != 'VID': props_list = traverse_obj(webpage, (
{self._ASTRO_ISLAND_RE.findall}, ...,
{extract_attributes}, 'props', {json.loads}))
description = traverse_obj(props_list, (..., 'leadParagraph', 1, {clean_html}, any, filter))
main_content = traverse_obj(props_list, (..., ('content', ('articleData', 1, 'opening')), 1, {dict}, any))
if traverse_obj(props_list, (..., 'editorialType', 1, {str}, any)) != 'VID': # e.g. 'ART'
entries = [] entries = []
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])): for p in traverse_obj(props_list, (..., 'articleData', 1, ('opening', ('body', 1, ...)), 1, {dict})):
content = p['content'] type_ = traverse_obj(p, ('type', 1, {str}))
type_ = p.get('type') content = traverse_obj(p, ('content', 1, {str} if type_ == 'paragraph' else {dict}))
if type_ == 'paragraph' and isinstance(content, str): if not content:
continue
if type_ == 'paragraph':
description = join_nonempty(description, content, delim='') description = join_nonempty(description, content, delim='')
elif type_ == 'video' and isinstance(content, dict): elif type_ == 'video':
entries.append(self._parse_content(content, url)) entries.append(self._parse_content(content, url))
else:
self.report_warning(
f'Skipping unsupported content type "{type_}"', display_id, only_once=True)
return self.playlist_result( return self.playlist_result(
entries, str_or_none(article.get('id')), entries,
traverse_obj(article, ('title', {str})), clean_html(description)) traverse_obj(props_list, (..., 'id', 1, {int}, {str_or_none}, any)) or display_id,
traverse_obj(main_content, ('dataTitle', 1, {str})),
clean_html(description))
info = self._parse_content(article['opening']['content'], url) if not main_content:
raise ExtractorError('Unable to extract main content from webpage')
info = self._parse_content(main_content, url)
info['description'] = description info['description'] = description
return info return info

View File

@@ -15,7 +15,7 @@
class TubiTvIE(InfoExtractor): class TubiTvIE(InfoExtractor):
IE_NAME = 'tubitv' IE_NAME = 'tubitv'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:[a-z]{2}-[a-z]{2}/)?(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_LOGIN_URL = 'http://tubitv.com/login' _LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv' _NETRC_MACHINE = 'tubitv'
_TESTS = [{ _TESTS = [{
@@ -73,6 +73,9 @@ class TubiTvIE(InfoExtractor):
'release_year': 1979, 'release_year': 1979,
}, },
'skip': 'Content Unavailable', 'skip': 'Content Unavailable',
}, {
'url': 'https://tubitv.com/es-mx/tv-shows/477363/s01-e03-jacob-dos-dos-y-la-tarjets-de-hockey-robada',
'only_matching': True,
}] }]
# DRM formats are included only to raise appropriate error # DRM formats are included only to raise appropriate error

View File

@@ -382,7 +382,8 @@ def _rich_entries(self, rich_grid_renderer):
('accessibilityText', {lambda x: re.fullmatch(r'(.+), (?:[\d,.]+(?:[KM]| million)?|No) views? - play Short', x)}, 1)), any), ('accessibilityText', {lambda x: re.fullmatch(r'(.+), (?:[\d,.]+(?:[KM]| million)?|No) views? - play Short', x)}, 1)), any),
'view_count': ('overlayMetadata', 'secondaryText', 'content', {parse_count}), 'view_count': ('overlayMetadata', 'secondaryText', 'content', {parse_count}),
}), }),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources')) thumbnails=self._extract_thumbnails(
renderer, ('thumbnailViewModel', 'thumbnailViewModel', 'image'), final_key='sources'))
return return
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
@@ -1585,7 +1586,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 50, 'playlist_count': 50,
'expected_warnings': ['YouTube Music is not directly supported'], 'expected_warnings': ['YouTube Music is not directly supported'],
}, { }, {
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'note': 'unlisted single video playlist', 'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': { 'info_dict': {
@@ -1885,8 +1885,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 30, 'playlist_mincount': 30,
}, { }, {
# Shorts url result in shorts tab # Shorts url result in shorts tab
# TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@@ -1915,7 +1913,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# Live video status should be extracted # Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': { 'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg', 'id': 'UCQvWX73GQygcwXOTSf_VDVg',

View File

@@ -1212,7 +1212,7 @@ def _preset_alias_callback(option, opt_str, value, parser):
help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval') help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
workarounds.add_option( workarounds.add_option(
'--sleep-subtitles', metavar='SECONDS', '--sleep-subtitles', metavar='SECONDS',
dest='sleep_interval_subtitles', default=0, type=int, dest='sleep_interval_subtitles', default=0, type=float,
help='Number of seconds to sleep before each subtitle download') help='Number of seconds to sleep before each subtitle download')
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options') verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')