From: Ben Darnell <ben@bendarnell.com>
Date: Thu, 16 Jun 2011 02:34:49 +0000 (-0700)
Subject: New method to_basestring replaces some use of native_str.
X-Git-Tag: v2.0.0~14
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db579dad715b99bc70687a5f5b5e319eb7dbcaba;p=thirdparty%2Ftornado.git

New method to_basestring replaces some use of native_str.

native_str would force the argument to (utf8) bytes, while in python2
it is often more appropriate to preserve the type of the input data.

Closes #280
---

diff --git a/tornado/escape.py b/tornado/escape.py
index 3fa602be1..14039a689 100644
--- a/tornado/escape.py
+++ b/tornado/escape.py
@@ -59,7 +59,7 @@ except:
 
 def xhtml_escape(value):
     """Escapes a string so it is valid within XML or XHTML."""
-    return xml.sax.saxutils.escape(native_str(value), {'"': "&quot;"})
+    return xml.sax.saxutils.escape(to_basestring(value), {'"': "&quot;"})
 
 
 def xhtml_unescape(value):
@@ -80,7 +80,7 @@ def json_encode(value):
 
 def json_decode(value):
     """Returns Python objects for the given JSON string."""
-    return _json_decode(native_str(value))
+    return _json_decode(to_basestring(value))
 
 
 def squeeze(value):
@@ -122,7 +122,7 @@ else:
         if encoding is None:
             return urllib.parse.unquote_to_bytes(value)
         else:
-            return urllib.unquote_plus(native_str(value), encoding=encoding)
+            return urllib.unquote_plus(to_basestring(value), encoding=encoding)
 
     def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
         """Parses a query string like urlparse.parse_qs, but returns the
@@ -178,6 +178,20 @@ if str is unicode:
 else:
     native_str = utf8
 
+_BASESTRING_TYPES = (basestring, type(None))
+def to_basestring(value):
+    """Converts a string argument to a subclass of basestring.
+
+    In python2, byte and unicode strings are mostly interchangeable,
+    so functions that deal with a user-supplied argument in combination
+    with ascii string constants can use either and should return the type
+    the user supplied.  In python3, the two types are not interchangeable,
+    so this method is needed to convert byte strings to unicode.
+    """
+    if isinstance(value, _BASESTRING_TYPES):
+        return value
+    assert isinstance(value, bytes)
+    return value.decode("utf-8")
 
 def recursive_unicode(obj):
     """Walks a simple data structure, converting byte strings to unicode.
diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py
index 15aeb64ea..5904a540c 100644
--- a/tornado/test/escape_test.py
+++ b/tornado/test/escape_test.py
@@ -3,7 +3,7 @@
 import tornado.escape
 import unittest
 
-from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
+from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode
 from tornado.util import b
 
 linkify_tests = [
@@ -165,3 +165,18 @@ class EscapeTestCase(unittest.TestCase):
             # and unicode strings.
             self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
             self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
+
+    def test_escape_return_types(self):
+        # On python2 the escape methods should generally return the same
+        # type as their argument
+        self.assertEqual(type(xhtml_escape("foo")), str)
+        self.assertEqual(type(xhtml_escape(u"foo")), unicode)
+
+    def test_json_decode(self):
+        # json_decode accepts both bytes and unicode, but strings it returns
+        # are always unicode.
+        self.assertEqual(json_decode(b('"foo"')), u"foo")
+        self.assertEqual(json_decode(u'"foo"'), u"foo")
+
+        # Non-ascii bytes are interpreted as utf8
+        self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")