Add tornado.util.u and a fixer script to start using it.

author Ben Darnell <ben@bendarnell.com>

Sun, 13 Jan 2013 23:10:01 +0000 (18:10 -0500)

committer Ben Darnell <ben@bendarnell.com>

Sun, 13 Jan 2013 23:40:18 +0000 (18:40 -0500)
author Ben Darnell <ben@bendarnell.com>
Sun, 13 Jan 2013 23:10:01 +0000 (18:10 -0500)
committer Ben Darnell <ben@bendarnell.com>
Sun, 13 Jan 2013 23:40:18 +0000 (18:40 -0500)
diff --git a/maint/scripts/custom_fixers/fix_unicode_literal.py b/maint/scripts/custom_fixers/fix_unicode_literal.py

new file mode 100644 (file)

index 0000000..cfc6b3a
--- /dev/null
+++ b/maint/scripts/custom_fixers/fix_unicode_literal.py
@@ -0,0 +1,17 @@
+import re
+from lib2to3.pgen2 import token
+from lib2to3 import fixer_base
+from lib2to3.fixer_util import Name, Call
+
+_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
+
+class FixUnicodeLiteral(fixer_base.BaseFix):
+    BM_compatible = True
+    PATTERN = """STRING"""
+
+    def transform(self, node, results):
+        if node.type == token.STRING and _literal_re.match(node.value):
+            new = node.clone()
+            new.value = new.value[1:]
+            new.prefix = ''
+            node.replace(Call(Name(u'u', prefix=node.prefix), [new]))
diff --git a/tornado/test/util_test.py b/tornado/test/util_test.py

index 581c4d814e2cf7d5ab82a537ec29a03a5bfd406a..aa1bf09bf09f7cf7ed935834f7453605562f5a98 100644 (file)
--- a/tornado/test/util_test.py
+++ b/tornado/test/util_test.py
@@ -1,7 +1,9 @@
+# coding: utf-8
  from __future__ import absolute_import, division, with_statement
  import sys
  
-from tornado.util import raise_exc_info, Configurable
+from tornado.escape import utf8
+from tornado.util import raise_exc_info, Configurable, u, b
  from tornado.test.util import unittest
  
  
@@ -112,3 +114,8 @@ class ConfigurableTest(unittest.TestCase):
          # args bound in configure don't apply when using the subclass directly
          obj = TestConfig2()
          self.assertIs(obj.b, None)
+
+
+class UnicodeLiteralTest(unittest.TestCase):
+    def test_unicode_escapes(self):
+        self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))
diff --git a/tornado/util.py b/tornado/util.py

index f550449a0725adb6fb32e3d8ee99211caf459a0a..0dda16506f7c38dec533f93a98cf5720f554493f 100644 (file)
--- a/tornado/util.py
+++ b/tornado/util.py
@@ -68,13 +68,23 @@ def import_object(name):
  # to convert our string literals.  b() should only be applied to literal
  # latin1 strings.  Once we drop support for 2.5, we can remove this function
  # and just use byte literals.
+#
+# Fake unicode literal support:  Python 3.2 doesn't have the u'' marker for
+# literal strings, and alternative solutions like "from __future__ import
+# unicode_literals" have other problems (see PEP 414).  u() can be applied
+# to ascii strings that include \u escapes (but they must not contain
+# literal non-ascii characters).
  if str is unicode:
      def b(s):
          return s.encode('latin1')
+    def u(s):
+        return s
      bytes_type = bytes
  else:
      def b(s):
          return s
+    def u(s):
+        return s.decode('unicode_escape')
      bytes_type = str
author	Ben Darnell <ben@bendarnell.com>
	Sun, 13 Jan 2013 23:10:01 +0000 (18:10 -0500)
committer	Ben Darnell <ben@bendarnell.com>
	Sun, 13 Jan 2013 23:40:18 +0000 (18:40 -0500)
maint/scripts/custom_fixers/fix_unicode_literal.py	[new file with mode: 0644]	patch \| blob
tornado/test/util_test.py		patch \| blob \| blame \| history
tornado/util.py		patch \| blob \| blame \| history