import htmlentitydefs
import re
+import sys
import xml.sax.saxutils
import urllib
"""Returns a valid URL-encoded version of the given value."""
return urllib.quote_plus(utf8(value))
-
-def url_unescape(value):
- """Decodes the given value from a URL."""
- return _unicode(urllib.unquote_plus(value))
+# python 3 changed things around enough that we need two separate
+# implementations of url_unescape
+if sys.version_info[0] < 3:
+ def url_unescape(value, encoding='utf-8'):
+ """Decodes the given value from a URL.
+
+ The argument may be either a byte or unicode string.
+
+ If encoding is None, the result will be a byte string. Otherwise,
+ the result is a unicode string in the specified encoding.
+ """
+ if encoding is None:
+ return urllib.unquote_plus(utf8(value))
+ else:
+ return unicode(urllib.unquote_plus(utf8(value)), encoding)
+else:
+ def url_unescape(value, encoding='utf-8'):
+ """Decodes the given value from a URL.
+
+ The argument may be either a byte or unicode string.
+
+ If encoding is None, the result will be a byte string. Otherwise,
+ the result is a unicode string in the specified encoding.
+ """
+ if encoding is None:
+ return urllib.parse.unquote_to_bytes(value)
+ else:
+ return urllib.unquote_plus(native_str(value), encoding=encoding)
_UTF8_TYPES = (bytes, type(None))
import tornado.escape
import unittest
-from tornado.escape import utf8, xhtml_escape, xhtml_unescape
+from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
from tornado.util import b
linkify_tests = [
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
+
+ def test_url_escape(self):
+ tests = [
+ # byte strings are passed through as-is
+ (u'\u00e9'.encode('utf8'), '%C3%A9'),
+ (u'\u00e9'.encode('latin1'), '%E9'),
+
+ # unicode strings become utf8
+ (u'\u00e9', '%C3%A9'),
+ ]
+ for unescaped, escaped in tests:
+ self.assertEqual(url_escape(unescaped), escaped)
+
+ def test_url_unescape(self):
+ tests = [
+ ('%C3%A9', u'\u00e9', 'utf8'),
+ ('%C3%A9', u'\u00c3\u00a9', 'latin1'),
+ ('%C3%A9', utf8(u'\u00e9'), None),
+ ]
+ for escaped, unescaped, encoding in tests:
+ # input strings to url_unescape should only contain ascii
+ # characters, but make sure the function accepts both byte
+ # and unicode strings.
+ self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
+ self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)