mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-12-16 22:25:40 +07:00
Compare commits
5 Commits
af285016d2
...
854fded114
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
854fded114 | ||
|
|
5f66ac71f6 | ||
|
|
4cb5e191ef | ||
|
|
6ee6a6fc58 | ||
|
|
23f1ab3469 |
@@ -755,6 +755,17 @@ def test_partial_read_then_full_read(self, handler):
|
|||||||
assert res.read(0) == b''
|
assert res.read(0) == b''
|
||||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
|
def test_partial_read_greater_than_response_then_full_read(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
for encoding in ('', 'gzip', 'deflate'):
|
||||||
|
res = validate_and_send(rh, Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': encoding}))
|
||||||
|
assert res.headers.get('Content-Encoding') == encoding
|
||||||
|
assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
assert res.read(0) == b''
|
||||||
|
assert res.read() == b''
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||||
@@ -920,6 +931,28 @@ def test_http_response_auto_close(self, handler):
|
|||||||
assert res.fp.fp is None
|
assert res.fp.fp is None
|
||||||
assert res.closed
|
assert res.closed
|
||||||
|
|
||||||
|
def test_data_uri_partial_read_then_full_read(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||||
|
assert res.read(6) == b'hello '
|
||||||
|
assert res.read(0) == b''
|
||||||
|
assert res.read() == b'world'
|
||||||
|
# Should automatically close the underlying file object
|
||||||
|
assert res.fp.closed
|
||||||
|
assert res.closed
|
||||||
|
|
||||||
|
def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||||
|
assert res.read(512) == b'hello world'
|
||||||
|
# Response and its underlying file object should already be closed now
|
||||||
|
assert res.fp.closed
|
||||||
|
assert res.closed
|
||||||
|
assert res.read(0) == b''
|
||||||
|
assert res.read() == b''
|
||||||
|
assert res.fp.closed
|
||||||
|
assert res.closed
|
||||||
|
|
||||||
def test_http_error_returns_content(self, handler):
|
def test_http_error_returns_content(self, handler):
|
||||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||||
def get_response():
|
def get_response():
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
pytestmark = pytest.mark.handler_flaky(
|
pytestmark = pytest.mark.handler_flaky(
|
||||||
'Websockets',
|
'Websockets',
|
||||||
os.name != 'nt' and sys.implementation.name == 'pypy',
|
os.name == 'nt' or sys.implementation.name == 'pypy',
|
||||||
reason='segfaults',
|
reason='segfaults',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -461,7 +461,8 @@ def download(self, filename, info_dict, subtitle=False):
|
|||||||
min_sleep_interval = self.params.get('sleep_interval') or 0
|
min_sleep_interval = self.params.get('sleep_interval') or 0
|
||||||
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
||||||
|
|
||||||
if available_at := info_dict.get('available_at'):
|
requested_formats = info_dict.get('requested_formats') or [info_dict]
|
||||||
|
if available_at := max(f.get('available_at') or 0 for f in requested_formats):
|
||||||
forced_sleep_interval = available_at - int(time.time())
|
forced_sleep_interval = available_at - int(time.time())
|
||||||
if forced_sleep_interval > min_sleep_interval:
|
if forced_sleep_interval > min_sleep_interval:
|
||||||
sleep_note = 'as required by the site'
|
sleep_note = 'as required by the site'
|
||||||
|
|||||||
@@ -691,6 +691,10 @@
|
|||||||
FrontendMastersIE,
|
FrontendMastersIE,
|
||||||
FrontendMastersLessonIE,
|
FrontendMastersLessonIE,
|
||||||
)
|
)
|
||||||
|
from .frontro import (
|
||||||
|
TheChosenGroupIE,
|
||||||
|
TheChosenIE,
|
||||||
|
)
|
||||||
from .fujitv import FujiTVFODPlus7IE
|
from .fujitv import FujiTVFODPlus7IE
|
||||||
from .funk import FunkIE
|
from .funk import FunkIE
|
||||||
from .funker530 import Funker530IE
|
from .funker530 import Funker530IE
|
||||||
@@ -1094,7 +1098,10 @@
|
|||||||
from .massengeschmacktv import MassengeschmackTVIE
|
from .massengeschmacktv import MassengeschmackTVIE
|
||||||
from .masters import MastersIE
|
from .masters import MastersIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mave import MaveIE
|
from .mave import (
|
||||||
|
MaveChannelIE,
|
||||||
|
MaveIE,
|
||||||
|
)
|
||||||
from .mbn import MBNIE
|
from .mbn import MBNIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .medaltv import MedalTVIE
|
from .medaltv import MedalTVIE
|
||||||
|
|||||||
164
yt_dlp/extractor/frontro.py
Normal file
164
yt_dlp/extractor/frontro.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none, parse_iso8601, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class FrontoBaseIE(InfoExtractor):
|
||||||
|
def _get_auth_headers(self, url):
|
||||||
|
return traverse_obj(self._get_cookies(url), {
|
||||||
|
'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class FrontroVideoBaseIE(FrontoBaseIE):
|
||||||
|
_CHANNEL_ID = None
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._download_json(
|
||||||
|
'https://api.frontrow.cc/query', video_id, data=json.dumps({
|
||||||
|
'operationName': 'Video',
|
||||||
|
'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
|
||||||
|
'query': '''query Video($channelID: ID!, $videoID: ID!) {
|
||||||
|
video(ChannelID: $channelID, VideoID: $videoID) {
|
||||||
|
... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
|
||||||
|
}
|
||||||
|
}''',
|
||||||
|
}).encode(), headers={
|
||||||
|
'content-type': 'application/json',
|
||||||
|
**self._get_auth_headers(url),
|
||||||
|
})['data']['video']
|
||||||
|
if not traverse_obj(metadata, 'hasAccess'):
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('thumbnail', {url_or_none}),
|
||||||
|
'timestamp': ('createdAt', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'like_count': ('likeCount', {int_or_none}),
|
||||||
|
'comment_count': ('comments', {int_or_none}),
|
||||||
|
'view_count': ('views', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FrontroGroupBaseIE(FrontoBaseIE):
|
||||||
|
_CHANNEL_ID = None
|
||||||
|
_VIDEO_EXTRACTOR = None
|
||||||
|
_VIDEO_URL_TMPL = None
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
group_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._download_json(
|
||||||
|
'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
|
||||||
|
data=json.dumps({
|
||||||
|
'operationName': 'PaginatedStaticPageContainer',
|
||||||
|
'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
|
||||||
|
'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
|
||||||
|
pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
|
||||||
|
... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
|
||||||
|
id contentItem { ... on ItemVideo { videoItem: item {
|
||||||
|
id
|
||||||
|
}}}
|
||||||
|
}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''',
|
||||||
|
}).encode(), headers={
|
||||||
|
'content-type': 'application/json',
|
||||||
|
**self._get_auth_headers(url),
|
||||||
|
})['data']['pageContainer']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for video_id in traverse_obj(metadata, (
|
||||||
|
'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
|
||||||
|
):
|
||||||
|
entries.append(self.url_result(
|
||||||
|
self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': group_id,
|
||||||
|
'entries': entries,
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'timestamp': ('createdAt', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TheChosenIE(FrontroVideoBaseIE):
|
||||||
|
_CHANNEL_ID = '12884901895'
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||||
|
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '184683594325',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Season 3 Episode 2: Two by Two',
|
||||||
|
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'duration': 4212,
|
||||||
|
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||||
|
'timestamp': 1698954546,
|
||||||
|
'upload_date': '20231102',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||||
|
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '184683596189',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Season 4 Episode 8: Humble',
|
||||||
|
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'duration': 5092,
|
||||||
|
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||||
|
'timestamp': 1715019474,
|
||||||
|
'upload_date': '20240506',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||||
|
_CHANNEL_ID = '12884901895'
|
||||||
|
_VIDEO_EXTRACTOR = TheChosenIE
|
||||||
|
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '309237658592',
|
||||||
|
'title': 'Season 3',
|
||||||
|
'timestamp': 1746203969,
|
||||||
|
'upload_date': '20250502',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}]
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
import re
|
import functools
|
||||||
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
InAdvancePagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@@ -10,15 +12,64 @@
|
|||||||
from ..utils.traversal import require, traverse_obj
|
from ..utils.traversal import require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class MaveIE(InfoExtractor):
|
class MaveBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
_API_BASE_URL = 'https://api.mave.digital/v1/website'
|
||||||
|
_API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
|
||||||
|
|
||||||
|
def _load_channel_meta(self, channel_id, display_id):
|
||||||
|
return traverse_obj(self._download_json(
|
||||||
|
f'{self._API_BASE_URL}/{channel_id}/', display_id,
|
||||||
|
note='Downloading channel metadata'), 'podcast')
|
||||||
|
|
||||||
|
def _load_episode_meta(self, channel_id, episode_code, display_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
|
||||||
|
display_id, note='Downloading episode metadata')
|
||||||
|
|
||||||
|
def _create_entry(self, channel_id, channel_meta, episode_meta):
|
||||||
|
episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
|
||||||
|
return {
|
||||||
|
'display_id': f'{channel_id}-{episode_code}',
|
||||||
|
'extractor_key': MaveIE.ie_key(),
|
||||||
|
'extractor': MaveIE.IE_NAME,
|
||||||
|
'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||||
|
'vcodec': 'none',
|
||||||
|
**traverse_obj(episode_meta, {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'season_number': ('season', {int_or_none}),
|
||||||
|
'episode_number': ('number', {int_or_none}),
|
||||||
|
'view_count': ('listenings', {int_or_none}),
|
||||||
|
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||||
|
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||||
|
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||||
|
'timestamp': ('publish_date', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(channel_meta, {
|
||||||
|
'series_id': ('id', {str}),
|
||||||
|
'series': ('title', {str}),
|
||||||
|
'channel': ('title', {str}),
|
||||||
|
'uploader': ('author', {str}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MaveIE(MaveBaseIE):
|
||||||
|
IE_NAME = 'mave'
|
||||||
|
_VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'display_id': 'ochenlichnoe-ep-25',
|
'display_id': 'ochenlichnoe-25',
|
||||||
'title': 'Между мной и миром: психология самооценки',
|
'title': 'Между мной и миром: психология самооценки',
|
||||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||||
'uploader': 'Самарский университет',
|
'uploader': 'Самарский университет',
|
||||||
@@ -45,7 +96,7 @@ class MaveIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'display_id': 'budem-ep-12',
|
'display_id': 'budem-12',
|
||||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||||
@@ -68,40 +119,72 @@ class MaveIE(InfoExtractor):
|
|||||||
'upload_date': '20241230',
|
'upload_date': '20241230',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
_API_BASE_URL = 'https://api.mave.digital/'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
channel_id, episode_code = self._match_valid_url(url).group(
|
||||||
display_id = f'{channel_id}-{slug}'
|
'channel_id', 'episode_code')
|
||||||
webpage = self._download_webpage(url, display_id)
|
display_id = f'{channel_id}-{episode_code}'
|
||||||
data = traverse_obj(
|
|
||||||
self._search_nuxt_json(webpage, display_id),
|
channel_meta = self._load_channel_meta(channel_id, display_id)
|
||||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
|
||||||
|
|
||||||
|
return self._create_entry(channel_id, channel_meta, episode_meta)
|
||||||
|
|
||||||
|
|
||||||
|
class MaveChannelIE(MaveBaseIE):
|
||||||
|
IE_NAME = 'mave:channel'
|
||||||
|
_VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://budem.mave.digital/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'budem',
|
||||||
|
'title': 'Все там будем',
|
||||||
|
'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 15,
|
||||||
|
}, {
|
||||||
|
'url': 'https://ochenlichnoe.mave.digital/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ochenlichnoe',
|
||||||
|
'title': 'Очень личное',
|
||||||
|
'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
'url': 'https://geekcity.mave.digital/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'geekcity',
|
||||||
|
'title': 'Мужчины в трико',
|
||||||
|
'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 80,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
|
def _entries(self, channel_id, channel_meta, page_num):
|
||||||
|
page_data = self._download_json(
|
||||||
|
f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
|
||||||
|
'view': 'all',
|
||||||
|
'page': page_num + 1,
|
||||||
|
'sort': 'newest',
|
||||||
|
'format': 'all',
|
||||||
|
}, note=f'Downloading page {page_num + 1}')
|
||||||
|
for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
|
||||||
|
yield self._create_entry(channel_id, channel_meta, ep)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
|
channel_meta = self._load_channel_meta(channel_id, channel_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'display_id': display_id,
|
'_type': 'playlist',
|
||||||
'channel_id': channel_id,
|
'id': channel_id,
|
||||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
**traverse_obj(channel_meta, {
|
||||||
'vcodec': 'none',
|
|
||||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
|
||||||
**traverse_obj(data, ('activeEpisodeData', {
|
|
||||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
|
||||||
'id': ('id', {str}),
|
|
||||||
'title': ('title', {str}),
|
'title': ('title', {str}),
|
||||||
'description': ('description', {clean_html}),
|
'description': ('description', {str}),
|
||||||
'duration': ('duration', {int_or_none}),
|
}),
|
||||||
'season_number': ('season', {int_or_none}),
|
'entries': InAdvancePagedList(
|
||||||
'episode_number': ('number', {int_or_none}),
|
functools.partial(self._entries, channel_id, channel_meta),
|
||||||
'view_count': ('listenings', {int_or_none}),
|
math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
|
||||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
|
||||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
|
||||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
|
||||||
'timestamp': ('publish_date', {parse_iso8601}),
|
|
||||||
})),
|
|
||||||
**traverse_obj(data, ('podcast', 'podcast', {
|
|
||||||
'series_id': ('id', {str}),
|
|
||||||
'series': ('title', {str}),
|
|
||||||
'channel': ('title', {str}),
|
|
||||||
'uploader': ('author', {str}),
|
|
||||||
})),
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3150,6 +3150,9 @@ def _extract_formats_and_subtitles(self, video_id, player_responses, player_url,
|
|||||||
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
||||||
'Use formats=duplicate extractor argument instead')
|
'Use formats=duplicate extractor argument instead')
|
||||||
|
|
||||||
|
def is_super_resolution(f_url):
|
||||||
|
return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...))
|
||||||
|
|
||||||
def solve_sig(s, spec):
|
def solve_sig(s, spec):
|
||||||
return ''.join(s[i] for i in spec)
|
return ''.join(s[i] for i in spec)
|
||||||
|
|
||||||
@@ -3202,7 +3205,7 @@ def get_language_code_and_preference(fmt_stream):
|
|||||||
def get_stream_id(fmt_stream):
|
def get_stream_id(fmt_stream):
|
||||||
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||||||
|
|
||||||
def process_format_stream(fmt_stream, proto, missing_pot):
|
def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False):
|
||||||
itag = str_or_none(fmt_stream.get('itag'))
|
itag = str_or_none(fmt_stream.get('itag'))
|
||||||
audio_track = fmt_stream.get('audioTrack') or {}
|
audio_track = fmt_stream.get('audioTrack') or {}
|
||||||
quality = fmt_stream.get('quality')
|
quality = fmt_stream.get('quality')
|
||||||
@@ -3253,10 +3256,13 @@ def process_format_stream(fmt_stream, proto, missing_pot):
|
|||||||
dct = {
|
dct = {
|
||||||
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
||||||
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||||||
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
'format_id': join_nonempty(itag, (
|
||||||
|
'drc' if fmt_stream.get('isDrc')
|
||||||
|
else 'sr' if super_resolution
|
||||||
|
else None)),
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
||||||
name, fmt_stream.get('isDrc') and 'DRC',
|
name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled',
|
||||||
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||||
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||||
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
||||||
@@ -3342,7 +3348,9 @@ def process_https_formats():
|
|||||||
self.report_warning(msg, video_id, only_once=True)
|
self.report_warning(msg, video_id, only_once=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
|
fmt = process_format_stream(
|
||||||
|
fmt_stream, proto, missing_pot=require_po_token and not po_token,
|
||||||
|
super_resolution=is_super_resolution(fmt_url))
|
||||||
if not fmt:
|
if not fmt:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@@ -305,6 +305,8 @@ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
|
|||||||
status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
|
status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
|
||||||
|
|
||||||
def read(self, amt=None):
|
def read(self, amt=None):
|
||||||
|
if self.closed:
|
||||||
|
return b''
|
||||||
try:
|
try:
|
||||||
data = self.fp.read(amt)
|
data = self.fp.read(amt)
|
||||||
underlying = getattr(self.fp, 'fp', None)
|
underlying = getattr(self.fp, 'fp', None)
|
||||||
|
|||||||
Reference in New Issue
Block a user