New method to_basestring replaces some use of native_str.

author Ben Darnell <ben@bendarnell.com>

Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)

committer Ben Darnell <ben@bendarnell.com>

Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)
author Ben Darnell <ben@bendarnell.com>
Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)
committer Ben Darnell <ben@bendarnell.com>
Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)
diff --git a/tornado/escape.py b/tornado/escape.py

index 3fa602be140f7f0e521eb32fc8aac451859b1e17..14039a6899c38ff104bbc3a9f7a4aedac878fb32 100644 (file)
--- a/tornado/escape.py
+++ b/tornado/escape.py
@@ -59,7 +59,7 @@ except:
  
  def xhtml_escape(value):
      """Escapes a string so it is valid within XML or XHTML."""
-    return xml.sax.saxutils.escape(native_str(value), {'"': "&quot;"})
+    return xml.sax.saxutils.escape(to_basestring(value), {'"': "&quot;"})
  
  
  def xhtml_unescape(value):
@@ -80,7 +80,7 @@ def json_encode(value):
  
  def json_decode(value):
      """Returns Python objects for the given JSON string."""
-    return _json_decode(native_str(value))
+    return _json_decode(to_basestring(value))
  
  
  def squeeze(value):
@@ -122,7 +122,7 @@ else:
          if encoding is None:
              return urllib.parse.unquote_to_bytes(value)
          else:
-            return urllib.unquote_plus(native_str(value), encoding=encoding)
+            return urllib.unquote_plus(to_basestring(value), encoding=encoding)
  
      def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
          """Parses a query string like urlparse.parse_qs, but returns the
@@ -178,6 +178,20 @@ if str is unicode:
  else:
      native_str = utf8
  
+_BASESTRING_TYPES = (basestring, type(None))
+def to_basestring(value):
+    """Converts a string argument to a subclass of basestring.
+
+    In python2, byte and unicode strings are mostly interchangeable,
+    so functions that deal with a user-supplied argument in combination
+    with ascii string constants can use either and should return the type
+    the user supplied.  In python3, the two types are not interchangeable,
+    so this method is needed to convert byte strings to unicode.
+    """
+    if isinstance(value, _BASESTRING_TYPES):
+        return value
+    assert isinstance(value, bytes)
+    return value.decode("utf-8")
  
  def recursive_unicode(obj):
      """Walks a simple data structure, converting byte strings to unicode.
diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py

index 15aeb64ea26a0c522d9b3e2d33bf2ecb09ae0349..5904a540cbd7f427c670bfdd4b9027f587b2d1fd 100644 (file)
--- a/tornado/test/escape_test.py
+++ b/tornado/test/escape_test.py
@@ -3,7 +3,7 @@
  import tornado.escape
  import unittest
  
-from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
+from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode
  from tornado.util import b
  
  linkify_tests = [
@@ -165,3 +165,18 @@ class EscapeTestCase(unittest.TestCase):
              # and unicode strings.
              self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
              self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
+
+    def test_escape_return_types(self):
+        # On python2 the escape methods should generally return the same
+        # type as their argument
+        self.assertEqual(type(xhtml_escape("foo")), str)
+        self.assertEqual(type(xhtml_escape(u"foo")), unicode)
+
+    def test_json_decode(self):
+        # json_decode accepts both bytes and unicode, but strings it returns
+        # are always unicode.
+        self.assertEqual(json_decode(b('"foo"')), u"foo")
+        self.assertEqual(json_decode(u'"foo"'), u"foo")
+
+        # Non-ascii bytes are interpreted as utf8
+        self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")
author	Ben Darnell <ben@bendarnell.com>
	Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)
committer	Ben Darnell <ben@bendarnell.com>
	Thu, 16 Jun 2011 02:34:49 +0000 (19:34 -0700)
tornado/escape.py		patch \| blob \| blame \| history
tornado/test/escape_test.py		patch \| blob \| blame \| history