def xhtml_escape(value):
"""Escapes a string so it is valid within XML or XHTML."""
- return xml.sax.saxutils.escape(native_str(value), {'"': """})
+ return xml.sax.saxutils.escape(to_basestring(value), {'"': """})
def xhtml_unescape(value):
def json_decode(value):
"""Returns Python objects for the given JSON string."""
- return _json_decode(native_str(value))
+ return _json_decode(to_basestring(value))
def squeeze(value):
if encoding is None:
return urllib.parse.unquote_to_bytes(value)
else:
- return urllib.unquote_plus(native_str(value), encoding=encoding)
+ return urllib.unquote_plus(to_basestring(value), encoding=encoding)
def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
"""Parses a query string like urlparse.parse_qs, but returns the
else:
native_str = utf8
+_BASESTRING_TYPES = (basestring, type(None))
+def to_basestring(value):
+ """Converts a string argument to a subclass of basestring.
+
+ In python2, byte and unicode strings are mostly interchangeable,
+ so functions that deal with a user-supplied argument in combination
+ with ascii string constants can use either and should return the type
+ the user supplied. In python3, the two types are not interchangeable,
+ so this method is needed to convert byte strings to unicode.
+ """
+ if isinstance(value, _BASESTRING_TYPES):
+ return value
+ assert isinstance(value, bytes)
+ return value.decode("utf-8")
def recursive_unicode(obj):
"""Walks a simple data structure, converting byte strings to unicode.
import tornado.escape
import unittest
-from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode
+from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode
from tornado.util import b
linkify_tests = [
# and unicode strings.
self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
+
+ def test_escape_return_types(self):
+ # On python2 the escape methods should generally return the same
+ # type as their argument
+ self.assertEqual(type(xhtml_escape("foo")), str)
+ self.assertEqual(type(xhtml_escape(u"foo")), unicode)
+
+ def test_json_decode(self):
+ # json_decode accepts both bytes and unicode, but strings it returns
+ # are always unicode.
+ self.assertEqual(json_decode(b('"foo"')), u"foo")
+ self.assertEqual(json_decode(u'"foo"'), u"foo")
+
+ # Non-ascii bytes are interpreted as utf8
+ self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")