[mtvservices:embedded] Use another endpoint to get feed URL
Closes #10363 In the original mtvservices:embedded test case, config.xml is still used to get the feed URL. Some other examples, including test_Generic_40 (http://www.vulture.com/2016/06/new-key-peele-sketches-released.html), and the video mentioned in #10363, use another endpoint to get the feed URL. The 'index.html' approach works for the original test case, too. So I didn't keep the old approach.main
parent
97653f81b2
commit
0c75abbb7b
|
@ -1,3 +1,9 @@
|
||||||
|
version <unreleased>
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
|
||||||
|
|
||||||
|
|
||||||
version 2016.08.24.1
|
version 2016.08.24.1
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
|
|
@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
|
||||||
|
|
||||||
|
|
||||||
class BetIE(MTVServicesInfoExtractor):
|
class BetIE(MTVServicesInfoExtractor):
|
||||||
|
@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
|
||||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
return compat_urllib_parse_urlencode({
|
return {
|
||||||
'uuid': uri,
|
'uuid': uri,
|
||||||
})
|
}
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||||
|
|
|
@ -4,7 +4,6 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_xpath,
|
compat_xpath,
|
||||||
)
|
)
|
||||||
|
@ -14,12 +13,13 @@ from ..utils import (
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
RegexNotFoundError,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
def _id_from_uri(uri):
|
def _id_from_uri(uri):
|
||||||
return uri.split(':')[-1]
|
return uri.split(':')[-1]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _remove_template_parameter(url):
|
||||||
|
# Remove the templates, like &device={device}
|
||||||
|
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
||||||
|
|
||||||
# This was originally implemented for ComedyCentral, but it also works here
|
# This was originally implemented for ComedyCentral, but it also works here
|
||||||
@classmethod
|
@classmethod
|
||||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
|
@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
|
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
|
||||||
mediagen_url = content_el.attrib['url']
|
mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
|
||||||
# Remove the templates, like &device={device}
|
|
||||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
|
||||||
if 'acceptMethods' not in mediagen_url:
|
if 'acceptMethods' not in mediagen_url:
|
||||||
mediagen_url += '&' if '?' in mediagen_url else '?'
|
mediagen_url += '&' if '?' in mediagen_url else '?'
|
||||||
mediagen_url += 'acceptMethods=fms'
|
mediagen_url += 'acceptMethods=fms'
|
||||||
|
@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
data = {'uri': uri}
|
data = {'uri': uri}
|
||||||
if self._LANG:
|
if self._LANG:
|
||||||
data['lang'] = self._LANG
|
data['lang'] = self._LANG
|
||||||
return compat_urllib_parse_urlencode(data)
|
return data
|
||||||
|
|
||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
feed_url = self._get_feed_url(uri)
|
feed_url = self._get_feed_url(uri)
|
||||||
info_url = feed_url + '?' + self._get_feed_query(uri)
|
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
||||||
return self._get_videos_info_from_url(info_url, video_id)
|
return self._get_videos_info_from_url(info_url, video_id)
|
||||||
|
|
||||||
def _get_videos_info_from_url(self, url, video_id):
|
def _get_videos_info_from_url(self, url, video_id):
|
||||||
|
@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||||
|
|
||||||
def _get_feed_url(self, uri):
|
def _get_feed_url(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
site_id = uri.replace(video_id, '')
|
config = self._download_json(
|
||||||
config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/'
|
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
|
||||||
'context52/config.xml'.format(site_id))
|
return self._remove_template_parameter(config['feedWithQueryParams'])
|
||||||
config_doc = self._download_xml(config_url, video_id)
|
|
||||||
feed_node = config_doc.find('.//feed')
|
|
||||||
feed_url = feed_node.text.strip().split('?')[0]
|
|
||||||
return feed_url
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
|
||||||
from ..utils import update_url_query
|
from ..utils import update_url_query
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
return compat_urllib_parse_urlencode({
|
return {
|
||||||
'feed': 'nick_arc_player_prime',
|
'feed': 'nick_arc_player_prime',
|
||||||
'mgid': uri,
|
'mgid': uri,
|
||||||
})
|
}
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
|
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
|
||||||
|
|
Loading…
Reference in New Issue