[youtube] Use '_download_xml' for getting the available subtitles
parent
b8bc7a696b
commit
60e47a2699
|
@ -612,24 +612,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
subs_doc = self._download_xml(
|
||||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
||||||
return {}
|
return {}
|
||||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
|
||||||
|
|
||||||
sub_lang_list = {}
|
sub_lang_list = {}
|
||||||
for l in lang_list:
|
for track in subs_doc.findall('track'):
|
||||||
lang = l[1]
|
lang = track.attrib['lang_code']
|
||||||
if lang in sub_lang_list:
|
if lang in sub_lang_list:
|
||||||
continue
|
continue
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
||||||
'name': unescapeHTML(l[0]).encode('utf-8'),
|
'name': track.attrib['name'].encode('utf-8'),
|
||||||
})
|
})
|
||||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||||
sub_lang_list[lang] = url
|
sub_lang_list[lang] = url
|
||||||
|
|
Loading…
Reference in New Issue