]> git.ipfire.org Git - thirdparty/tornado.git/commitdiff
Add tornado.util.u and a fixer script to start using it.
authorBen Darnell <ben@bendarnell.com>
Sun, 13 Jan 2013 23:10:01 +0000 (18:10 -0500)
committerBen Darnell <ben@bendarnell.com>
Sun, 13 Jan 2013 23:40:18 +0000 (18:40 -0500)
maint/scripts/custom_fixers/fix_unicode_literal.py [new file with mode: 0644]
tornado/test/util_test.py
tornado/util.py

diff --git a/maint/scripts/custom_fixers/fix_unicode_literal.py b/maint/scripts/custom_fixers/fix_unicode_literal.py
new file mode 100644 (file)
index 0000000..cfc6b3a
--- /dev/null
@@ -0,0 +1,17 @@
+import re
+from lib2to3.pgen2 import token
+from lib2to3 import fixer_base
+from lib2to3.fixer_util import Name, Call
+
+_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
+
+class FixUnicodeLiteral(fixer_base.BaseFix):
+    BM_compatible = True
+    PATTERN = """STRING"""
+
+    def transform(self, node, results):
+        if node.type == token.STRING and _literal_re.match(node.value):
+            new = node.clone()
+            new.value = new.value[1:]
+            new.prefix = ''
+            node.replace(Call(Name(u'u', prefix=node.prefix), [new]))
index 581c4d814e2cf7d5ab82a537ec29a03a5bfd406a..aa1bf09bf09f7cf7ed935834f7453605562f5a98 100644 (file)
@@ -1,7 +1,9 @@
+# coding: utf-8
 from __future__ import absolute_import, division, with_statement
 import sys
 
-from tornado.util import raise_exc_info, Configurable
+from tornado.escape import utf8
+from tornado.util import raise_exc_info, Configurable, u, b
 from tornado.test.util import unittest
 
 
@@ -112,3 +114,8 @@ class ConfigurableTest(unittest.TestCase):
         # args bound in configure don't apply when using the subclass directly
         obj = TestConfig2()
         self.assertIs(obj.b, None)
+
+
+class UnicodeLiteralTest(unittest.TestCase):
+    def test_unicode_escapes(self):
+        self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))
index f550449a0725adb6fb32e3d8ee99211caf459a0a..0dda16506f7c38dec533f93a98cf5720f554493f 100644 (file)
@@ -68,13 +68,23 @@ def import_object(name):
 # to convert our string literals.  b() should only be applied to literal
 # latin1 strings.  Once we drop support for 2.5, we can remove this function
 # and just use byte literals.
+#
+# Fake unicode literal support:  Python 3.2 doesn't have the u'' marker for
+# literal strings, and alternative solutions like "from __future__ import
+# unicode_literals" have other problems (see PEP 414).  u() can be applied
+# to ascii strings that include \u escapes (but they must not contain
+# literal non-ascii characters).
 if str is unicode:
     def b(s):
         return s.encode('latin1')
+    def u(s):
+        return s
     bytes_type = bytes
 else:
     def b(s):
         return s
+    def u(s):
+        return s.decode('unicode_escape')
     bytes_type = str