[YoutubeDL] Fix format selection with filters (Closes #10083)
parent
23495d6a39
commit
317f7ab634
|
@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||||
|
|
||||||
|
def test_audio_only_extractor_format_selection(self):
|
||||||
|
# For extractors with incomplete formats (all formats are audio-only or
|
||||||
|
# video-only) best and worst should fallback to corresponding best/worst
|
||||||
|
# video-only or audio-only formats (as per
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'high')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'worst'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'low')
|
||||||
|
|
||||||
|
def test_format_not_available(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||||
|
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
# This must fail since complete video-audio format does not match filter
|
||||||
|
# and extractor does not provide incomplete only formats (i.e. only
|
||||||
|
# video-only or audio-only).
|
||||||
|
ydl = YDL({'format': 'best[height>360]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
def test_invalid_format_specs(self):
|
def test_invalid_format_specs(self):
|
||||||
def assert_syntax_error(format_spec):
|
def assert_syntax_error(format_spec):
|
||||||
ydl = YDL({'format': format_spec})
|
ydl = YDL({'format': format_spec})
|
||||||
|
|
|
@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
|
@ -1051,9 +1052,9 @@ class YoutubeDL(object):
|
||||||
if isinstance(selector, list):
|
if isinstance(selector, list):
|
||||||
fs = [_build_selector_function(s) for s in selector]
|
fs = [_build_selector_function(s) for s in selector]
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
for f in fs:
|
for f in fs:
|
||||||
for format in f(formats):
|
for format in f(ctx):
|
||||||
yield format
|
yield format
|
||||||
return selector_function
|
return selector_function
|
||||||
elif selector.type == GROUP:
|
elif selector.type == GROUP:
|
||||||
|
@ -1061,17 +1062,17 @@ class YoutubeDL(object):
|
||||||
elif selector.type == PICKFIRST:
|
elif selector.type == PICKFIRST:
|
||||||
fs = [_build_selector_function(s) for s in selector.selector]
|
fs = [_build_selector_function(s) for s in selector.selector]
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
for f in fs:
|
for f in fs:
|
||||||
picked_formats = list(f(formats))
|
picked_formats = list(f(ctx))
|
||||||
if picked_formats:
|
if picked_formats:
|
||||||
return picked_formats
|
return picked_formats
|
||||||
return []
|
return []
|
||||||
elif selector.type == SINGLE:
|
elif selector.type == SINGLE:
|
||||||
format_spec = selector.selector
|
format_spec = selector.selector
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
formats = list(formats)
|
formats = list(ctx['formats'])
|
||||||
if not formats:
|
if not formats:
|
||||||
return
|
return
|
||||||
if format_spec == 'all':
|
if format_spec == 'all':
|
||||||
|
@ -1084,9 +1085,10 @@ class YoutubeDL(object):
|
||||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||||
if audiovideo_formats:
|
if audiovideo_formats:
|
||||||
yield audiovideo_formats[format_idx]
|
yield audiovideo_formats[format_idx]
|
||||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
# for extractors with incomplete formats (audio only (soundcloud)
|
||||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
# or video only (imgur)) we will fallback to best/worst
|
||||||
all(f.get('vcodec') != 'none' for f in formats)):
|
# {video,audio}-only format
|
||||||
|
elif ctx['incomplete_formats']:
|
||||||
yield formats[format_idx]
|
yield formats[format_idx]
|
||||||
elif format_spec == 'bestaudio':
|
elif format_spec == 'bestaudio':
|
||||||
audio_formats = [
|
audio_formats = [
|
||||||
|
@ -1160,17 +1162,18 @@ class YoutubeDL(object):
|
||||||
}
|
}
|
||||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||||
|
|
||||||
def selector_function(formats):
|
def selector_function(ctx):
|
||||||
formats = list(formats)
|
for pair in itertools.product(
|
||||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||||
yield _merge(pair)
|
yield _merge(pair)
|
||||||
|
|
||||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||||
|
|
||||||
def final_selector(formats):
|
def final_selector(ctx):
|
||||||
|
ctx_copy = copy.deepcopy(ctx)
|
||||||
for _filter in filters:
|
for _filter in filters:
|
||||||
formats = list(filter(_filter, formats))
|
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||||
return selector_function(formats)
|
return selector_function(ctx_copy)
|
||||||
return final_selector
|
return final_selector
|
||||||
|
|
||||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||||
|
@ -1377,7 +1380,35 @@ class YoutubeDL(object):
|
||||||
req_format_list.append('best')
|
req_format_list.append('best')
|
||||||
req_format = '/'.join(req_format_list)
|
req_format = '/'.join(req_format_list)
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
formats_to_download = list(format_selector(formats))
|
|
||||||
|
# While in format selection we may need to have an access to the original
|
||||||
|
# format set in order to calculate some metrics or do some processing.
|
||||||
|
# For now we need to be able to guess whether original formats provided
|
||||||
|
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||||
|
# video-only or audio-only formats) for proper formats selection for
|
||||||
|
# extractors with such incomplete formats (see
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||||
|
# Since formats may be filtered during format selection and may not match
|
||||||
|
# the original formats the results may be incorrect. Thus original formats
|
||||||
|
# or pre-calculated metrics should be passed to format selection routines
|
||||||
|
# as well.
|
||||||
|
# We will pass a context object containing all necessary additional data
|
||||||
|
# instead of just formats.
|
||||||
|
# This fixes incorrect format selection issue (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||||
|
incomplete_formats = all(
|
||||||
|
# All formats are video-only or
|
||||||
|
f.get('vcodec') != 'none' and f.get('acodec') == 'none' or
|
||||||
|
# all formats are audio-only
|
||||||
|
f.get('vcodec') == 'none' and f.get('acodec') != 'none'
|
||||||
|
for f in formats)
|
||||||
|
|
||||||
|
ctx = {
|
||||||
|
'formats': formats,
|
||||||
|
'incomplete_formats': incomplete_formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats_to_download = list(format_selector(ctx))
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
raise ExtractorError('requested format not available',
|
raise ExtractorError('requested format not available',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
Loading…
Reference in New Issue