2014-11-25 06:02:00 +11:00
# coding: utf-8
from __future__ import unicode_literals
2016-02-01 08:00:09 +11:00
import re
2014-11-25 06:02:00 +11:00
from . common import InfoExtractor
2016-02-01 08:00:09 +11:00
from . . utils import (
js_to_json ,
determine_ext ,
)
2014-11-25 06:02:00 +11:00
2014-11-25 08:47:23 +11:00
2014-11-25 06:02:00 +11:00
class BpbIE ( InfoExtractor ) :
2014-11-25 08:47:23 +11:00
IE_DESC = ' Bundeszentrale für politische Bildung '
_VALID_URL = r ' http://www \ .bpb \ .de/mediathek/(?P<id>[0-9]+)/ '
_TEST = {
' url ' : ' http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr ' ,
2016-02-01 08:00:09 +11:00
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
' md5 ' : ' c4f84c8a8044ca9ff68bb8441d300b3f ' ,
2014-11-25 08:47:23 +11:00
' info_dict ' : {
' id ' : ' 297 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Joachim Gauck zu 1989 und die Erinnerung an die DDR ' ,
' description ' : ' Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine " gewisse Traurigkeit " im Umgang mit der DDR-Vergangenheit. '
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
r ' <h2 class= " white " >(.*?)</h2> ' , webpage , ' title ' )
2016-02-01 08:00:09 +11:00
video_info_dicts = re . findall (
r " ( { \ s*src: \ s* ' http://film \ .bpb \ .de/[^}]+}) " , webpage )
formats = [ ]
for video_info in video_info_dicts :
video_info = self . _parse_json ( video_info , video_id , transform_source = js_to_json )
quality = video_info [ ' quality ' ]
video_url = video_info [ ' src ' ]
formats . append ( {
' url ' : video_url ,
' preference ' : 10 if quality == ' high ' else 0 ,
' format_note ' : quality ,
' format_id ' : ' %s - %s ' % ( quality , determine_ext ( video_url ) ) ,
} )
self . _sort_formats ( formats )
2014-11-25 08:47:23 +11:00
return {
' id ' : video_id ,
2016-02-01 08:00:09 +11:00
' formats ' : formats ,
2014-11-25 08:47:23 +11:00
' title ' : title ,
' description ' : self . _og_search_description ( webpage ) ,
}