2014-09-28 03:28:01 +10:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
from . . utils import (
get_meta_content ,
2014-10-27 11:33:49 +11:00
int_or_none ,
2014-09-28 03:28:01 +10:00
parse_iso8601 ,
)
class HeiseIE ( InfoExtractor ) :
2014-09-28 18:40:49 +10:00
_VALID_URL = r ''' (?x)
https ? : / / ( ? : www \. ) ? heise \. de / video / artikel /
. + ? ( ? P < id > [ 0 - 9 ] + ) \. html ( ? : $ | [ ? #])
'''
2014-09-28 03:28:01 +10:00
_TEST = {
' url ' : (
2014-09-28 18:40:49 +10:00
' http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html '
2014-09-28 03:28:01 +10:00
) ,
' md5 ' : ' ffed432483e922e88545ad9f2f15d30e ' ,
' info_dict ' : {
' id ' : ' 2404147 ' ,
' ext ' : ' mp4 ' ,
' title ' : (
2014-09-28 18:40:49 +10:00
" Podcast: c ' t uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone "
2014-09-28 03:28:01 +10:00
) ,
' format_id ' : ' mp4_720 ' ,
' timestamp ' : 1411812600 ,
' upload_date ' : ' 20140927 ' ,
2014-09-28 18:49:12 +10:00
' description ' : ' In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten. ' ,
2014-10-27 11:33:49 +11:00
' thumbnail ' : ' re:https?://.* \ .jpg$ ' ,
2014-09-28 03:28:01 +10:00
}
}
def _real_extract ( self , url ) :
2014-09-28 18:40:49 +10:00
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
2014-10-27 11:33:49 +11:00
container_id = self . _search_regex (
r ' <div class= " videoplayerjw " .*?data-container= " ([0-9]+) " ' ,
webpage , ' container ID ' )
sequenz_id = self . _search_regex (
r ' <div class= " videoplayerjw " .*?data-sequenz= " ([0-9]+) " ' ,
webpage , ' sequenz ID ' )
data_url = ' http://www.heise.de/videout/feed?container= %s &sequenz= %s ' % ( container_id , sequenz_id )
doc = self . _download_xml ( data_url , video_id )
2014-09-28 03:28:01 +10:00
info = {
2014-09-28 18:40:49 +10:00
' id ' : video_id ,
2014-10-27 11:33:49 +11:00
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
2014-09-28 18:40:49 +10:00
' timestamp ' : parse_iso8601 ( get_meta_content ( ' date ' , webpage ) ) ,
2014-09-28 18:49:12 +10:00
' description ' : self . _og_search_description ( webpage ) ,
2014-09-28 03:28:01 +10:00
}
2014-09-28 18:40:49 +10:00
title = get_meta_content ( ' fulltitle ' , webpage )
2014-09-28 03:28:01 +10:00
if title :
info [ ' title ' ] = title
else :
2014-09-28 18:40:49 +10:00
info [ ' title ' ] = self . _og_search_title ( webpage )
2014-09-28 03:28:01 +10:00
formats = [ ]
2014-10-27 11:33:49 +11:00
for source_node in doc . findall ( ' .// { http://rss.jwpcdn.com/}source ' ) :
label = source_node . attrib [ ' label ' ]
height = int_or_none ( self . _search_regex (
r ' ^(.*?_)?([0-9]+)p$ ' , label , ' height ' , default = None ) )
formats . append ( {
' url ' : source_node . attrib [ ' file ' ] ,
' format_note ' : label ,
' height ' : height ,
} )
2014-09-28 03:28:01 +10:00
self . _sort_formats ( formats )
info [ ' formats ' ] = formats
return info