Add tornado.escape.native_str() for dealing with python 2 vs 3 issues.

author Ben Darnell <ben@bendarnell.com>

Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)

committer Ben Darnell <ben@bendarnell.com>

Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)
author Ben Darnell <ben@bendarnell.com>
Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)
committer Ben Darnell <ben@bendarnell.com>
Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)
diff --git a/tornado/escape.py b/tornado/escape.py

index 42c5ad76408c1d2e6f0f0ce6d9f8c8c2bb9b8402..5bd627abd0a3b4ca830f00b6e5e20dfd62a557df 100644 (file)
--- a/tornado/escape.py
+++ b/tornado/escape.py
@@ -94,11 +94,39 @@ def url_unescape(value):
  
  _UTF8_TYPES = (bytes, type(None))
  def utf8(value):
+    """Converts a string argument to a byte string.
+
+    If the argument is already a byte string or None, it is returned unchanged.
+    Otherwise it must be a unicode string and is encoded as utf8.
+    """
      if isinstance(value, _UTF8_TYPES):
          return value
      assert isinstance(value, unicode)
      return value.encode("utf-8")
  
+_TO_UNICODE_TYPES = (unicode, type(None))
+def to_unicode(value):
+    """Converts a string argument to a unicode string.
+
+    If the argument is already a unicode string or None, it is returned
+    unchanged.  Otherwise it must be a byte string and is decoded as utf8.
+    """
+    if isinstance(value, _TO_UNICODE_TYPES):
+        return value
+    assert isinstance(value, bytes)
+    return value.decode("utf-8")
+
+# to_unicode was previously named _unicode not because it was private,
+# but to avoid conflicts with the built-in unicode() function/type
+_unicode = to_unicode
+
+# When dealing with the standard library across python 2 and 3 it is
+# sometimes useful to have a direct conversion to the native string type
+if str is unicode:
+    native_str = to_unicode
+else:
+    native_str = utf8
+
  
  # I originally used the regex from 
  # http://daringfireball.net/2010/07/improved_regex_for_matching_urls
@@ -187,13 +215,6 @@ def linkify(text, shorten=False, extra_params="",
      return _URL_RE.sub(make_link, text)
  
  
-def _unicode(value):
-    if isinstance(value, bytes):
-        return value.decode("utf-8")
-    assert isinstance(value, unicode)
-    return value
-
-
  def _convert_entity(m):
      if m.group(1) == "#":
          try:
author	Ben Darnell <ben@bendarnell.com>
	Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)
committer	Ben Darnell <ben@bendarnell.com>
	Sun, 15 May 2011 23:58:29 +0000 (16:58 -0700)