2015-02-19 11:43:20 +11:00
from __future__ import unicode_literals
import re
from . common import InfoExtractor
2015-05-31 08:05:26 +10:00
from . . compat import compat_urlparse
2015-02-19 11:43:20 +11:00
from . . utils import (
int_or_none ,
js_to_json ,
mimetype2ext ,
2015-02-19 13:12:48 +11:00
ExtractorError ,
2015-02-19 11:43:20 +11:00
)
2015-02-19 15:53:09 +11:00
2015-02-19 11:43:20 +11:00
class ImgurIE ( InfoExtractor ) :
2015-05-31 08:05:26 +10:00
_VALID_URL = r ' https?://(?:i \ .)?imgur \ .com/(?P<id>[a-zA-Z0-9]+) '
2015-02-19 11:43:20 +11:00
_TESTS = [ {
' url ' : ' https://i.imgur.com/A61SaA1.gifv ' ,
' info_dict ' : {
' id ' : ' A61SaA1 ' ,
' ext ' : ' mp4 ' ,
2015-02-20 12:52:03 +11:00
' title ' : ' re:Imgur GIF$|MRW gifv is up and running without any bugs$ ' ,
' description ' : ' re:The origin of the Internet \' s most viral images$|The Internet \' s visual storytelling community \ . Explore, share, and discuss the best visual stories the Internet has to offer \ .$ ' ,
2015-02-19 11:43:20 +11:00
} ,
2015-02-19 13:12:48 +11:00
} , {
' url ' : ' https://imgur.com/A61SaA1 ' ,
' info_dict ' : {
' id ' : ' A61SaA1 ' ,
' ext ' : ' mp4 ' ,
2015-02-20 12:52:03 +11:00
' title ' : ' re:Imgur GIF$|MRW gifv is up and running without any bugs$ ' ,
' description ' : ' re:The origin of the Internet \' s most viral images$|The Internet \' s visual storytelling community \ . Explore, share, and discuss the best visual stories the Internet has to offer \ .$ ' ,
2015-02-19 13:12:48 +11:00
} ,
2015-02-19 11:43:20 +11:00
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2015-05-31 08:05:26 +10:00
webpage = self . _download_webpage (
compat_urlparse . urljoin ( url , video_id ) , video_id )
2015-02-19 11:43:20 +11:00
width = int_or_none ( self . _search_regex (
r ' <param name= " width " value= " ([0-9]+) " ' ,
webpage , ' width ' , fatal = False ) )
height = int_or_none ( self . _search_regex (
r ' <param name= " height " value= " ([0-9]+) " ' ,
webpage , ' height ' , fatal = False ) )
2015-02-19 15:53:09 +11:00
video_elements = self . _search_regex (
2015-02-19 11:43:20 +11:00
r ' (?s)<div class= " video-elements " >(.*?)</div> ' ,
2015-02-19 15:53:09 +11:00
webpage , ' video elements ' , default = None )
2015-02-19 14:47:54 +11:00
if not video_elements :
raise ExtractorError (
2015-02-19 15:53:09 +11:00
' No sources found for video %s . Maybe an image? ' % video_id ,
expected = True )
2015-02-19 14:47:54 +11:00
2015-02-19 11:43:20 +11:00
formats = [ ]
for m in re . finditer ( r ' <source \ s+src= " (?P<src>[^ " ]+) " \ s+type= " (?P<type>[^ " ]+) " ' , video_elements ) :
formats . append ( {
' format_id ' : m . group ( ' type ' ) . partition ( ' / ' ) [ 2 ] ,
' url ' : self . _proto_relative_url ( m . group ( ' src ' ) ) ,
' ext ' : mimetype2ext ( m . group ( ' type ' ) ) ,
' acodec ' : ' none ' ,
' width ' : width ,
' height ' : height ,
' http_headers ' : {
' User-Agent ' : ' youtube-dl (like wget) ' ,
} ,
} )
gif_json = self . _search_regex (
r ' (?s)var \ s+videoItem \ s*= \ s*( \ { .*? \ }) ' ,
webpage , ' GIF code ' , fatal = False )
if gif_json :
gifd = self . _parse_json (
gif_json , video_id , transform_source = js_to_json )
formats . append ( {
' format_id ' : ' gif ' ,
' preference ' : - 10 ,
' width ' : width ,
' height ' : height ,
' ext ' : ' gif ' ,
' acodec ' : ' none ' ,
' vcodec ' : ' gif ' ,
' container ' : ' gif ' ,
' url ' : self . _proto_relative_url ( gifd [ ' gifUrl ' ] ) ,
' filesize ' : gifd . get ( ' size ' ) ,
' http_headers ' : {
' User-Agent ' : ' youtube-dl (like wget) ' ,
} ,
} )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' formats ' : formats ,
' description ' : self . _og_search_description ( webpage ) ,
' title ' : self . _og_search_title ( webpage ) ,
}