0
0
Fork 0

Py3 compat for unichr and htmlentitydefs

main
Philipp Hagemeister 2012-11-28 00:02:55 +01:00
parent da779b4924
commit 3e669f369f
1 changed files with 26 additions and 17 deletions

View File

@ -2,7 +2,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import gzip import gzip
import htmlentitydefs
import HTMLParser import HTMLParser
import locale import locale
import os import os
@ -17,19 +16,6 @@ try:
except ImportError: except ImportError:
import StringIO import StringIO
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5',
}
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
except ImportError: # Python 2 except ImportError: # Python 2
@ -50,6 +36,29 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
import cookielib as compat_cookiejar import cookielib as compat_cookiejar
try:
import html.entities as compat_html_entities
except NameError: # Python 2
import htmlentitydefs as compat_html_entities
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5',
}
def preferredencoding(): def preferredencoding():
"""Get preferred encoding. """Get preferred encoding.
@ -74,8 +83,8 @@ def htmlentity_transform(matchobj):
entity = matchobj.group(1) entity = matchobj.group(1)
# Known non-numeric HTML entity # Known non-numeric HTML entity
if entity in htmlentitydefs.name2codepoint: if entity in compat_html_entities.name2codepoint:
return unichr(htmlentitydefs.name2codepoint[entity]) return compat_chr(compat_html_entities.name2codepoint[entity])
mobj = re.match(u'(?u)#(x?\\d+)', entity) mobj = re.match(u'(?u)#(x?\\d+)', entity)
if mobj is not None: if mobj is not None:
@ -85,7 +94,7 @@ def htmlentity_transform(matchobj):
numstr = u'0%s' % numstr numstr = u'0%s' % numstr
else: else:
base = 10 base = 10
return unichr(int(numstr, base)) return compat_chr(int(numstr, base))
# Unknown entity in name, return its literal representation # Unknown entity in name, return its literal representation
return (u'&%s;' % entity) return (u'&%s;' % entity)