[f4m] Prefer baseURL for relative URLs (closes #14660)
parent
cd670befc4
commit
48107c198b
|
@ -243,8 +243,17 @@ def remove_encrypted_media(media):
|
||||||
media))
|
media))
|
||||||
|
|
||||||
|
|
||||||
def _add_ns(prop):
|
def _add_ns(prop, ver=1):
|
||||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_url(manifest):
|
||||||
|
base_url = xpath_text(
|
||||||
|
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
|
||||||
|
'base URL', default=None)
|
||||||
|
if base_url:
|
||||||
|
base_url = base_url.strip()
|
||||||
|
return base_url
|
||||||
|
|
||||||
|
|
||||||
class F4mFD(FragmentFD):
|
class F4mFD(FragmentFD):
|
||||||
|
@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
|
||||||
rate, media = list(filter(
|
rate, media = list(filter(
|
||||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||||
|
|
||||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
|
||||||
|
man_base_url = get_base_url(doc) or man_url
|
||||||
|
|
||||||
|
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
|
||||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||||
# From Adobe F4M 3.0 spec:
|
boot_info, bootstrap_url = self._parse_bootstrap_node(
|
||||||
# The <baseURL> element SHALL be the base URL for all relative
|
bootstrap_node, man_base_url)
|
||||||
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
|
|
||||||
# URLs should be relative to the location of the containing document.
|
|
||||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
|
|
||||||
live = boot_info['live']
|
live = boot_info['live']
|
||||||
metadata_node = media.find(_add_ns('metadata'))
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
if metadata_node is not None:
|
if metadata_node is not None:
|
||||||
|
|
|
@ -29,7 +29,10 @@ from ..compat import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import remove_encrypted_media
|
from ..downloader.f4m import (
|
||||||
|
get_base_url,
|
||||||
|
remove_encrypted_media,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
|
@ -1239,11 +1242,8 @@ class InfoExtractor(object):
|
||||||
media_nodes = remove_encrypted_media(media_nodes)
|
media_nodes = remove_encrypted_media(media_nodes)
|
||||||
if not media_nodes:
|
if not media_nodes:
|
||||||
return formats
|
return formats
|
||||||
base_url = xpath_text(
|
|
||||||
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
|
manifest_base_url = get_base_url(manifest)
|
||||||
'base URL', default=None)
|
|
||||||
if base_url:
|
|
||||||
base_url = base_url.strip()
|
|
||||||
|
|
||||||
bootstrap_info = xpath_element(
|
bootstrap_info = xpath_element(
|
||||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||||
|
@ -1275,7 +1275,7 @@ class InfoExtractor(object):
|
||||||
continue
|
continue
|
||||||
manifest_url = (
|
manifest_url = (
|
||||||
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||||
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
|
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
|
||||||
# If media_url is itself a f4m manifest do the recursive extraction
|
# If media_url is itself a f4m manifest do the recursive extraction
|
||||||
# since bitrates in parent manifest (this one) and media_url manifest
|
# since bitrates in parent manifest (this one) and media_url manifest
|
||||||
# may differ leading to inability to resolve the format by requested
|
# may differ leading to inability to resolve the format by requested
|
||||||
|
|
Loading…
Reference in New Issue