1
0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-12-18 07:05:41 +07:00

Update to ytdl-2021.01.03

This commit is contained in:
pukkandan
2021-01-01 17:56:37 +05:30
parent c2b5f3114f
commit 29f7c58aaf
96 changed files with 5757 additions and 3771 deletions

View File

@@ -29,9 +29,11 @@
sanitized_Request,
smuggle_url,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
UnsupportedError,
url_or_none,
xpath_attr,
xpath_text,
)
from .commonprotocols import RtmpIE
@@ -48,7 +50,6 @@
from .rutv import RUTVIE
from .tvc import TVCIE
from .sportbox import SportBoxIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
@@ -63,7 +64,10 @@
from .mofosex import MofosexEmbedIE
from .spankwire import SpankwireIE
from .youporn import YouPornIE
from .vimeo import VimeoIE
from .vimeo import (
VimeoIE,
VHXEmbedIE,
)
from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE
@@ -123,6 +127,7 @@
from .gedi import GediEmbedsIE
from .rcs import RCSEmbedsIE
from .bitchute import BitChuteIE
from .arcpublishing import ArcPublishingIE
class GenericIE(InfoExtractor):
@@ -201,11 +206,46 @@ class GenericIE(InfoExtractor):
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
'ext': 'm4v',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'title': 'MSNBC Rachel Maddow (video)',
'description': 're:.*her unique approach to storytelling.*',
},
'playlist': [{
'info_dict': {
'ext': 'mov',
'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
'description': 're:.*her unique approach to storytelling.*',
'upload_date': '20201204',
},
}],
},
# RSS feed with item with description and thumbnails
{
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
'info_dict': {
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
'title': 're:.*100% Hydrogen.*',
'description': 're:.*In this episode.*',
},
'playlist': [{
'info_dict': {
'ext': 'm4a',
'id': 'c1c879525ce2cb640b344507e682c36d',
'title': 're:Hydrogen!',
'description': 're:.*In this episode we are going.*',
'timestamp': 1567977776,
'upload_date': '20190908',
'duration': 459,
'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 1,
'season_number': 1,
'age_limit': 0,
},
}],
'params': {
'skip_download': True,
},
},
# RSS feed with enclosures and unsupported link URLs
{
@@ -1986,22 +2026,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': [SpringboardPlatformIE.ie_key()],
},
{
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': [YoutubeIE.ie_key()],
'params': {
'skip_download': True,
},
},
{
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
'info_dict': {
@@ -2106,23 +2130,23 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
'info_dict': {
'id': '5b400b834b32992a310622b9',
'ext': 'mp4',
'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g',
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
'upload_date': '20170909',
'timestamp': 1504915200,
},
'add_ie': [ZypeIE.ie_key()],
'params': {
'skip_download': True,
},
},
# {
# # Zype embed
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
# 'info_dict': {
# 'id': '5b400b834b32992a310622b9',
# 'ext': 'mp4',
# 'title': 'Smoky Barbecue Favorites',
# 'thumbnail': r're:^https?://.*\.jpe?g',
# 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
# 'upload_date': '20170909',
# 'timestamp': 1504915200,
# },
# 'add_ie': [ZypeIE.ie_key()],
# 'params': {
# 'skip_download': True,
# },
# },
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
@@ -2171,7 +2195,32 @@ class GenericIE(InfoExtractor):
# 'params': {
# 'force_generic_extractor': True,
# },
# }
# },
{
# VHX Embed
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
'info_dict': {
'id': '858208',
'ext': 'mp4',
'title': 'Untitled',
'uploader_id': 'user80538407',
'uploader': 'OTT Videos',
},
},
{
# ArcPublishing PoWa video player
'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
'info_dict': {
'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
'ext': 'mp4',
'title': 'Senate candidates wave to voters on Anchorage streets',
'description': 'md5:91f51a6511f090617353dc720318b20e',
'timestamp': 1604378735,
'upload_date': '20201103',
'duration': 1581,
},
},
]
def report_following_redirect(self, new_url):
@@ -2183,6 +2232,10 @@ def _extract_rss(self, url, video_id, doc):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = []
for it in doc.findall('./channel/item'):
next_url = None
@@ -2198,10 +2251,33 @@ def _extract_rss(self, url, video_id, doc):
if not next_url:
continue
def itunes(key):
return xpath_text(
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
default=None)
duration = itunes('duration')
explicit = (itunes('explicit') or '').lower()
if explicit in ('true', 'yes'):
age_limit = 18
elif explicit in ('false', 'no'):
age_limit = 0
else:
age_limit = None
entries.append({
'_type': 'url_transparent',
'url': next_url,
'title': it.find('title').text,
'description': xpath_text(it, 'description', default=None),
'timestamp': unified_timestamp(
xpath_text(it, 'pubDate', default=None)),
'duration': int_or_none(duration) or parse_duration(duration),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
'age_limit': age_limit,
})
return {
@@ -2321,7 +2397,7 @@ def _real_extract(self, url):
info_dict = {
'id': video_id,
'title': self._generic_title(url),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
}
# Check for direct link to a video
@@ -2427,7 +2503,9 @@ def _real_extract(self, url):
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
# webpage = compat_urllib_parse_unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@@ -2509,6 +2587,10 @@ def _real_extract(self, url):
if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
arc_urls = ArcPublishingIE._extract_urls(webpage)
if arc_urls:
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
@@ -2520,6 +2602,10 @@ def _real_extract(self, url):
if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
vhx_url = VHXEmbedIE._extract_url(webpage)
if vhx_url:
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
webpage, 'vid.me embed', default=None)
@@ -2775,11 +2861,6 @@ def _real_extract(self, url):
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage)
if smotri_url:
return self.url_result(smotri_url, 'Smotri')
# Look for embedded Myvi.ru player
myvi_url = MyviIE._extract_url(webpage)
if myvi_url: