Add a 'plus' argument to url_{un,}escape, defaulting to True.

author Ben Darnell <ben@bendarnell.com>

Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)

committer Ben Darnell <ben@bendarnell.com>

Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)
author Ben Darnell <ben@bendarnell.com>
Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)
committer Ben Darnell <ben@bendarnell.com>
Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)
diff --git a/tornado/escape.py b/tornado/escape.py

index 546bd2ea863fa8f47ecc6ac87a0cf68fd46299fb..688f7f08787ac4dee00c6a09cec7b24bdbd5be84 100644 (file)
--- a/tornado/escape.py
+++ b/tornado/escape.py
@@ -88,41 +88,67 @@ def squeeze(value):
      return re.sub(r"[\x00-\x20]+", " ", value).strip()
  
  
-def url_escape(value):
-    """Returns a URL-encoded version of the given value."""
-    return urllib_parse.quote_plus(utf8(value))
+def url_escape(value, plus=True):
+    """Returns a URL-encoded version of the given value.
+
+    If ``plus`` is true (the default), spaces will be represented
+    as "+" instead of "%20".  This is appropriate for query strings
+    but not for the path component of a URL.  Note that this default
+    is the reverse of Python's urllib module.
+    """
+    quote = urllib_parse.quote_plus if plus else urllib_parse.quote
+    return quote(utf8(value))
+
  
  # python 3 changed things around enough that we need two separate
  # implementations of url_unescape.  We also need our own implementation
  # of parse_qs since python 3's version insists on decoding everything.
  if sys.version_info[0] < 3:
-    def url_unescape(value, encoding='utf-8'):
+    def url_unescape(value, encoding='utf-8', plus=True):
          """Decodes the given value from a URL.
  
          The argument may be either a byte or unicode string.
  
          If encoding is None, the result will be a byte string.  Otherwise,
          the result is a unicode string in the specified encoding.
+
+        If ``plus`` is true (the default), plus signs will be interpreted
+        as spaces (literal plus signs must be represented as "%2B").  This
+        is appropriate for query strings and form-encoded values but not
+        for the path component of a URL.  Note that this default is the
+        reverse of Python's urllib module.
          """
+        unquote = (urllib_parse.unquote_plus if plus else urllib_parse.unquote)
          if encoding is None:
-            return urllib_parse.unquote_plus(utf8(value))
+            return unquote(utf8(value))
          else:
-            return unicode_type(urllib_parse.unquote_plus(utf8(value)), encoding)
+            return unicode_type(unquote(utf8(value)), encoding)
  
      parse_qs_bytes = _parse_qs
  else:
-    def url_unescape(value, encoding='utf-8'):
+    def url_unescape(value, encoding='utf-8', plus=True):
          """Decodes the given value from a URL.
  
          The argument may be either a byte or unicode string.
  
          If encoding is None, the result will be a byte string.  Otherwise,
          the result is a unicode string in the specified encoding.
+
+        If ``plus`` is true (the default), plus signs will be interpreted
+        as spaces (literal plus signs must be represented as "%2B").  This
+        is appropriate for query strings and form-encoded values but not
+        for the path component of a URL.  Note that this default is the
+        reverse of Python's urllib module.
          """
          if encoding is None:
+            if plus:
+                # unquote_to_bytes doesn't have a _plus variant
+                value = to_basestring(value).replace('+', ' ')
              return urllib_parse.unquote_to_bytes(value)
          else:
-            return urllib_parse.unquote_plus(to_basestring(value), encoding=encoding)
+            unquote = (urllib_parse.unquote_plus if plus
+                       else urllib_parse.unquote)
+            return unquote(to_basestring(value), encoding=encoding)
  
      def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
          """Parses a query string like urlparse.parse_qs, but returns the
diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py

index 8b4522c0cc699ca8bf1230f1687470fac33a9ada..90573c166e40639ad081ca66f71226b89bdb8a31 100644 (file)
--- a/tornado/test/escape_test.py
+++ b/tornado/test/escape_test.py
@@ -151,7 +151,7 @@ class EscapeTestCase(unittest.TestCase):
              self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
              self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
  
-    def test_url_escape(self):
+    def test_url_escape_unicode(self):
          tests = [
              # byte strings are passed through as-is
              (u('\u00e9').encode('utf8'), '%C3%A9'),
@@ -163,7 +163,7 @@ class EscapeTestCase(unittest.TestCase):
          for unescaped, escaped in tests:
              self.assertEqual(url_escape(unescaped), escaped)
  
-    def test_url_unescape(self):
+    def test_url_unescape_unicode(self):
          tests = [
              ('%C3%A9', u('\u00e9'), 'utf8'),
              ('%C3%A9', u('\u00c3\u00a9'), 'latin1'),
@@ -176,6 +176,19 @@ class EscapeTestCase(unittest.TestCase):
              self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
              self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
  
+    def test_url_escape_quote_plus(self):
+        unescaped = '+ #%'
+        plus_escaped = '%2B+%23%25'
+        escaped = '%2B%20%23%25'
+        self.assertEqual(url_escape(unescaped), plus_escaped)
+        self.assertEqual(url_escape(unescaped, plus=False), escaped)
+        self.assertEqual(url_unescape(plus_escaped), unescaped)
+        self.assertEqual(url_unescape(escaped, plus=False), unescaped)
+        self.assertEqual(url_unescape(plus_escaped, encoding=None),
+                         utf8(unescaped))
+        self.assertEqual(url_unescape(escaped, encoding=None, plus=False),
+                         utf8(unescaped))
+
      def test_escape_return_types(self):
          # On python2 the escape methods should generally return the same
          # type as their argument
author	Ben Darnell <ben@bendarnell.com>
	Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)
committer	Ben Darnell <ben@bendarnell.com>
	Sun, 12 May 2013 19:48:52 +0000 (15:48 -0400)
tornado/escape.py		patch \| blob \| blame \| history
tornado/test/escape_test.py		patch \| blob \| blame \| history