[extractor/generic] Extract f4m formats and refactor common info
parent
0fdbb3322b
commit
f930e0c76e
|
@ -1242,28 +1242,34 @@ class GenericIE(InfoExtractor):
|
||||||
full_response = self._request_webpage(request, video_id)
|
full_response = self._request_webpage(request, video_id)
|
||||||
head_response = full_response
|
head_response = full_response
|
||||||
|
|
||||||
|
info_dict = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
|
}
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = head_response.headers.get('Content-Type', '')
|
content_type = head_response.headers.get('Content-Type', '')
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||||
if m:
|
if m:
|
||||||
upload_date = unified_strdate(
|
upload_date = unified_strdate(
|
||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
formats = []
|
format_id = m.group('format_id')
|
||||||
if m.group('format_id').endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
|
elif format_id == 'f4m':
|
||||||
|
formats = self._extract_f4m_formats(url, video_id)
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': m.group('format_id'),
|
'format_id': m.group('format_id'),
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
})
|
||||||
|
return info_dict
|
||||||
|
|
||||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
force = self._downloader.params.get('force_generic_extractor', False)
|
force = self._downloader.params.get('force_generic_extractor', False)
|
||||||
|
@ -1291,13 +1297,12 @@ class GenericIE(InfoExtractor):
|
||||||
'URL could be a direct video link, returning it as such.')
|
'URL could be a direct video link, returning it as such.')
|
||||||
upload_date = unified_strdate(
|
upload_date = unified_strdate(
|
||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'url': url,
|
'url': url,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
})
|
||||||
|
return info_dict
|
||||||
|
|
||||||
webpage = self._webpage_read_content(
|
webpage = self._webpage_read_content(
|
||||||
full_response, url, video_id, prefix=first_bytes)
|
full_response, url, video_id, prefix=first_bytes)
|
||||||
|
@ -1314,12 +1319,12 @@ class GenericIE(InfoExtractor):
|
||||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
return {
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
'id': video_id,
|
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
return info_dict
|
||||||
'formats': self._parse_mpd_formats(
|
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||||
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
|
||||||
}
|
return info_dict
|
||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue