def _convert_entity(m):
if m.group(1) == "#":
try:
- return unichr(int(m.group(2)))
+ if m.group(2)[:1].lower() == 'x':
+ return unichr(int(m.group(2)[1:], 16))
+ else:
+ return unichr(int(m.group(2)))
except ValueError:
return "&#%s;" % m.group(2)
try:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
+ def test_xhtml_unescape_numeric(self):
+ tests = [
+ ('foo bar', 'foo bar'),
+ ('foo bar', 'foo bar'),
+ ('foo bar', 'foo bar'),
+ ('foo઼bar', u('foo\u0abcbar')),
+ ('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
+ ('foo&#;bar', 'foo&#;bar'), # invalid encoding
+ ('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
+ ]
+ for escaped, unescaped in tests:
+ self.assertEqual(unescaped, xhtml_unescape(escaped))
+
def test_url_escape_unicode(self):
tests = [
# byte strings are passed through as-is