From: Ben Darnell Date: Thu, 16 Jun 2011 02:34:49 +0000 (-0700) Subject: New method to_basestring replaces some use of native_str. X-Git-Tag: v2.0.0~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db579dad715b99bc70687a5f5b5e319eb7dbcaba;p=thirdparty%2Ftornado.git New method to_basestring replaces some use of native_str. native_str would force the argument to (utf8) bytes, while in python2 it is often more appropriate to preserve the type of the input data. Closes #280 --- diff --git a/tornado/escape.py b/tornado/escape.py index 3fa602be1..14039a689 100644 --- a/tornado/escape.py +++ b/tornado/escape.py @@ -59,7 +59,7 @@ except: def xhtml_escape(value): """Escapes a string so it is valid within XML or XHTML.""" - return xml.sax.saxutils.escape(native_str(value), {'"': """}) + return xml.sax.saxutils.escape(to_basestring(value), {'"': """}) def xhtml_unescape(value): @@ -80,7 +80,7 @@ def json_encode(value): def json_decode(value): """Returns Python objects for the given JSON string.""" - return _json_decode(native_str(value)) + return _json_decode(to_basestring(value)) def squeeze(value): @@ -122,7 +122,7 @@ else: if encoding is None: return urllib.parse.unquote_to_bytes(value) else: - return urllib.unquote_plus(native_str(value), encoding=encoding) + return urllib.unquote_plus(to_basestring(value), encoding=encoding) def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parses a query string like urlparse.parse_qs, but returns the @@ -178,6 +178,20 @@ if str is unicode: else: native_str = utf8 +_BASESTRING_TYPES = (basestring, type(None)) +def to_basestring(value): + """Converts a string argument to a subclass of basestring. + + In python2, byte and unicode strings are mostly interchangeable, + so functions that deal with a user-supplied argument in combination + with ascii string constants can use either and should return the type + the user supplied. In python3, the two types are not interchangeable, + so this method is needed to convert byte strings to unicode. + """ + if isinstance(value, _BASESTRING_TYPES): + return value + assert isinstance(value, bytes) + return value.decode("utf-8") def recursive_unicode(obj): """Walks a simple data structure, converting byte strings to unicode. diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py index 15aeb64ea..5904a540c 100644 --- a/tornado/test/escape_test.py +++ b/tornado/test/escape_test.py @@ -3,7 +3,7 @@ import tornado.escape import unittest -from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode +from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode from tornado.util import b linkify_tests = [ @@ -165,3 +165,18 @@ class EscapeTestCase(unittest.TestCase): # and unicode strings. self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped) self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped) + + def test_escape_return_types(self): + # On python2 the escape methods should generally return the same + # type as their argument + self.assertEqual(type(xhtml_escape("foo")), str) + self.assertEqual(type(xhtml_escape(u"foo")), unicode) + + def test_json_decode(self): + # json_decode accepts both bytes and unicode, but strings it returns + # are always unicode. + self.assertEqual(json_decode(b('"foo"')), u"foo") + self.assertEqual(json_decode(u'"foo"'), u"foo") + + # Non-ascii bytes are interpreted as utf8 + self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")