From: Ben Darnell Date: Mon, 30 May 2011 04:57:35 +0000 (-0700) Subject: Add an encoding argument to tornado.escape.url_unescape. X-Git-Tag: v2.0.0~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a8603159f44e37c36b4a328fe6d2c765d3af171e;p=thirdparty%2Ftornado.git Add an encoding argument to tornado.escape.url_unescape. --- diff --git a/tornado/escape.py b/tornado/escape.py index e828a6135..645790964 100644 --- a/tornado/escape.py +++ b/tornado/escape.py @@ -18,6 +18,7 @@ import htmlentitydefs import re +import sys import xml.sax.saxutils import urllib @@ -86,10 +87,34 @@ def url_escape(value): """Returns a valid URL-encoded version of the given value.""" return urllib.quote_plus(utf8(value)) - -def url_unescape(value): - """Decodes the given value from a URL.""" - return _unicode(urllib.unquote_plus(value)) +# python 3 changed things around enough that we need two separate +# implementations of url_unescape +if sys.version_info[0] < 3: + def url_unescape(value, encoding='utf-8'): + """Decodes the given value from a URL. + + The argument may be either a byte or unicode string. + + If encoding is None, the result will be a byte string. Otherwise, + the result is a unicode string in the specified encoding. + """ + if encoding is None: + return urllib.unquote_plus(utf8(value)) + else: + return unicode(urllib.unquote_plus(utf8(value)), encoding) +else: + def url_unescape(value, encoding='utf-8'): + """Decodes the given value from a URL. + + The argument may be either a byte or unicode string. + + If encoding is None, the result will be a byte string. Otherwise, + the result is a unicode string in the specified encoding. + """ + if encoding is None: + return urllib.parse.unquote_to_bytes(value) + else: + return urllib.unquote_plus(native_str(value), encoding=encoding) _UTF8_TYPES = (bytes, type(None)) diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py index c0fde83c7..15aeb64ea 100644 --- a/tornado/test/escape_test.py +++ b/tornado/test/escape_test.py @@ -3,7 +3,7 @@ import tornado.escape import unittest -from tornado.escape import utf8, xhtml_escape, xhtml_unescape +from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode from tornado.util import b linkify_tests = [ @@ -140,3 +140,28 @@ class EscapeTestCase(unittest.TestCase): for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped))) + + def test_url_escape(self): + tests = [ + # byte strings are passed through as-is + (u'\u00e9'.encode('utf8'), '%C3%A9'), + (u'\u00e9'.encode('latin1'), '%E9'), + + # unicode strings become utf8 + (u'\u00e9', '%C3%A9'), + ] + for unescaped, escaped in tests: + self.assertEqual(url_escape(unescaped), escaped) + + def test_url_unescape(self): + tests = [ + ('%C3%A9', u'\u00e9', 'utf8'), + ('%C3%A9', u'\u00c3\u00a9', 'latin1'), + ('%C3%A9', utf8(u'\u00e9'), None), + ] + for escaped, unescaped, encoding in tests: + # input strings to url_unescape should only contain ascii + # characters, but make sure the function accepts both byte + # and unicode strings. + self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped) + self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)