0
0
Fork 0

[KUSI] Add new extractor

main
mutantmonkey 2016-02-15 17:30:53 -08:00
parent f66a3c7bc2
commit 199e724291
2 changed files with 62 additions and 0 deletions

View File

@ -338,6 +338,7 @@ from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE
from .kuwo import ( from .kuwo import (
KuwoIE, KuwoIE,
KuwoAlbumIE, KuwoAlbumIE,

View File

@ -0,0 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote_plus
from ..utils import int_or_none
class KUSIIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TEST = {
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
'info_dict': {
'id': '12203019',
'ext': 'mp4',
'title': 'Turko Files: Case Closed! & Put On Hold!',
'duration': 231000,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('clipId') is not None:
video_id = mobj.group('clipId')
else:
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._html_search_regex(r'"clipId", "(\d+)"', webpage,
'clipId')
xml_url = 'http://www.kusi.com/build.asp?buildtype=buildfeaturexml'\
'request&featureType=Clip&featureid={0}&affiliateno=956&'\
'clientgroupid=1&rnd=562461'.format(video_id)
doc = self._download_xml(xml_url, video_id,
note='Downloading video info',
errnote='Failed to download video info')
video_title = doc.find('HEADLINE').text
duration = int_or_none(doc.find('DURATION'), get_attr='text')
description = doc.find('ABSTRACT')
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = []
for quality in quality_options:
if 'height' in quality.attrib:
formats.append({
'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
'height': quality.attrib['height'],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'description': description,
'duration': duration,
'formats': formats,
}