[extractor/common] Improved support for HTML5 subtitles
Ref: #10625 In a strict sense, <track>s with kind=captions are not subtitles. [1] openload misuses this attribute, and I guess there will be more examples, so I add it to common.py. Also allow extracting information for subtitles-only <video> or <audio> tags, which is the case of openload. [1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kindmain
parent
e6332059ac
commit
5968d7d2fe
|
@ -1,3 +1,9 @@
|
||||||
|
vesion <unreleased>
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Improved support for HTML5 subtitles
|
||||||
|
|
||||||
|
|
||||||
version 2016.09.24
|
version 2016.09.24
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
|
|
@ -1828,7 +1828,7 @@ class InfoExtractor(object):
|
||||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||||
track_attributes = extract_attributes(track_tag)
|
track_attributes = extract_attributes(track_tag)
|
||||||
kind = track_attributes.get('kind')
|
kind = track_attributes.get('kind')
|
||||||
if not kind or kind == 'subtitles':
|
if not kind or kind in ('subtitles', 'captions'):
|
||||||
src = track_attributes.get('src')
|
src = track_attributes.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
|
@ -1836,7 +1836,7 @@ class InfoExtractor(object):
|
||||||
media_info['subtitles'].setdefault(lang, []).append({
|
media_info['subtitles'].setdefault(lang, []).append({
|
||||||
'url': absolute_url(src),
|
'url': absolute_url(src),
|
||||||
})
|
})
|
||||||
if media_info['formats']:
|
if media_info['formats'] or media_info['subtitles']:
|
||||||
entries.append(media_info)
|
entries.append(media_info)
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue