[utils] Add a function to sanitize consecutive slashes in URLs
parent
5c7495a194
commit
55969016e9
|
@ -54,6 +54,7 @@ from youtube_dl.utils import (
|
|||
xpath_with_ns,
|
||||
render_table,
|
||||
match_str,
|
||||
url_sanitize_consecutive_slashes,
|
||||
)
|
||||
|
||||
|
||||
|
@ -501,6 +502,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
|
||||
def test_url_sanitize_consecutive_slashes(self):
|
||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||
'http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||
'http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||
'http://hostname//'), 'http://hostname/')
|
||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||
'http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||
'http://hostname/'), 'http://hostname/')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..compat import (
|
|||
compat_str,
|
||||
compat_urllib_request
|
||||
)
|
||||
from ..utils import url_sanitize_consecutive_slashes
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
|
@ -105,11 +106,8 @@ class SohuIE(InfoExtractor):
|
|||
|
||||
part_info = part_str.split('|')
|
||||
|
||||
# Sanitize URL to prevent download failure
|
||||
if part_info[0][-1] == '/' and su[i][0] == '/':
|
||||
su[i] = su[i][1:]
|
||||
|
||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||
video_url = url_sanitize_consecutive_slashes(
|
||||
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
|
|
@ -1789,3 +1789,18 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
|||
return None # No Proxy
|
||||
return compat_urllib_request.ProxyHandler.proxy_open(
|
||||
self, req, proxy, type)
|
||||
|
||||
|
||||
def url_sanitize_consecutive_slashes(url):
|
||||
"""Sanitize URLs with consecutive slashes
|
||||
|
||||
For example, transform both
|
||||
http://hostname/foo//bar/filename.html
|
||||
and
|
||||
http://hostname//foo/bar/filename.html
|
||||
into
|
||||
http://hostname/foo/bar/filename.html
|
||||
"""
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||
return compat_urlparse.urlunparse(parsed_url)
|
||||
|
|
Loading…
Reference in New Issue