Fix detection of the webpage charset if it's declared using ' instead of "
Like in "<meta charset='utf-8'/>"main
parent
2891932bf0
commit
0d75ae2ce3
|
@ -150,7 +150,7 @@ class InfoExtractor(object):
|
||||||
if m:
|
if m:
|
||||||
encoding = m.group(1)
|
encoding = m.group(1)
|
||||||
else:
|
else:
|
||||||
m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
|
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
|
||||||
webpage_bytes[:1024])
|
webpage_bytes[:1024])
|
||||||
if m:
|
if m:
|
||||||
encoding = m.group(1).decode('ascii')
|
encoding = m.group(1).decode('ascii')
|
||||||
|
|
Loading…
Reference in New Issue