From: Ben Darnell <ben@bendarnell.com>
Date: Mon, 30 May 2011 04:57:35 +0000 (-0700)
Subject: Add an encoding argument to tornado.escape.url_unescape.
X-Git-Tag: v2.0.0~43
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a8603159f44e37c36b4a328fe6d2c765d3af171e;p=thirdparty%2Ftornado.git

Add an encoding argument to tornado.escape.url_unescape.
---

diff --git a/tornado/escape.py b/tornado/escape.py
index e828a6135..645790964 100644
--- a/tornado/escape.py
+++ b/tornado/escape.py
@@ -18,6 +18,7 @@
 
 import htmlentitydefs
 import re
+import sys
 import xml.sax.saxutils
 import urllib
 
@@ -86,10 +87,34 @@ def url_escape(value):
     """Returns a valid URL-encoded version of the given value."""
     return urllib.quote_plus(utf8(value))
 
-
-def url_unescape(value):
-    """Decodes the given value from a URL."""
-    return _unicode(urllib.unquote_plus(value))
+# python 3 changed things around enough that we need two separate
+# implementations of url_unescape
+if sys.version_info[0] < 3:
+    def url_unescape(value, encoding='utf-8'):
+        """Decodes the given value from a URL.
+
+        The argument may be either a byte or unicode string.
+
+        If encoding is None, the result will be a byte string.  Otherwise,
+        the result is a unicode string in the specified encoding.
+        """
+        if encoding is None:
+            return urllib.unquote_plus(utf8(value))
+        else:
+            return unicode(urllib.unquote_plus(utf8(value)), encoding)
+else:
+    def url_unescape(value, encoding='utf-8'):
+        """Decodes the given value from a URL.
+
+        The argument may be either a byte or unicode string.
+
+        If encoding is None, the result will be a byte string.  Otherwise,
+        the result is a unicode string in the specified encoding.
+        """
+        if encoding is None:
+            return urllib.parse.unquote_to_bytes(value)
+        else:
+            return urllib.unquote_plus(native_str(value), encoding=encoding)
 
 
 _UTF8_TYPES = (bytes, type(None))
diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py
index c0fde83c7..15aeb64ea 100644
--- a/tornado/test/escape_test.py
+++ b/tornado/test/escape_test.py
@@ -3,7 +3,7 @@
 import tornado.escape
 import unittest
 
-from tornado.escape import utf8, xhtml_escape, xhtml_unescape
+from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
 from tornado.util import b
 
 linkify_tests = [
@@ -140,3 +140,28 @@ class EscapeTestCase(unittest.TestCase):
         for unescaped, escaped in tests:
             self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
             self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
+
+    def test_url_escape(self):
+        tests = [
+            # byte strings are passed through as-is
+            (u'\u00e9'.encode('utf8'), '%C3%A9'),
+            (u'\u00e9'.encode('latin1'), '%E9'),
+
+            # unicode strings become utf8
+            (u'\u00e9', '%C3%A9'),
+            ]
+        for unescaped, escaped in tests:
+            self.assertEqual(url_escape(unescaped), escaped)
+
+    def test_url_unescape(self):
+        tests = [
+            ('%C3%A9', u'\u00e9', 'utf8'),
+            ('%C3%A9', u'\u00c3\u00a9', 'latin1'),
+            ('%C3%A9', utf8(u'\u00e9'), None),
+            ]
+        for escaped, unescaped, encoding in tests:
+            # input strings to url_unescape should only contain ascii
+            # characters, but make sure the function accepts both byte
+            # and unicode strings.
+            self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
+            self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)