0
0
Fork 0

[webofstories] Tolerate malforder og:title (Closes #8417)

main
Sergey M․ 2016-02-28 03:37:48 +06:00
parent 7a0e7779fe
commit 8870bb4653
1 changed files with 39 additions and 25 deletions

View File

@ -12,8 +12,7 @@ class WebOfStoriesIE(InfoExtractor):
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
_TESTS = [ _TESTS = [{
{
'url': 'http://www.webofstories.com/play/hans.bethe/71', 'url': 'http://www.webofstories.com/play/hans.bethe/71',
'md5': '373e4dd915f60cfe3116322642ddf364', 'md5': '373e4dd915f60cfe3116322642ddf364',
'info_dict': { 'info_dict': {
@ -24,8 +23,7 @@ class WebOfStoriesIE(InfoExtractor):
'description': 'Hans Bethe talks about calculating the temperature of the sun', 'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238, 'duration': 238,
} }
}, }, {
{
'url': 'http://www.webofstories.com/play/55908', 'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c', 'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': { 'info_dict': {
@ -35,15 +33,31 @@ class WebOfStoriesIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169, 'duration': 169,
}
}, },
] 'skip': 'notfound',
}, {
# malformed og:title meta
'url': 'http://www.webofstories.com/play/54215?o=MS',
'info_dict': {
'id': '54215',
'ext': 'mp4',
'title': '"A Leg to Stand On"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Oliver Sacks talks about the death and resurrection of a limb',
'duration': 97,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage) # Sometimes og:title meta is malformed
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)