--- /dev/null
+import re
+from lib2to3.pgen2 import token
+from lib2to3 import fixer_base
+from lib2to3.fixer_util import Name, Call
+
+_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
+
+class FixUnicodeLiteral(fixer_base.BaseFix):
+ BM_compatible = True
+ PATTERN = """STRING"""
+
+ def transform(self, node, results):
+ if node.type == token.STRING and _literal_re.match(node.value):
+ new = node.clone()
+ new.value = new.value[1:]
+ new.prefix = ''
+ node.replace(Call(Name(u'u', prefix=node.prefix), [new]))
+# coding: utf-8
from __future__ import absolute_import, division, with_statement
import sys
-from tornado.util import raise_exc_info, Configurable
+from tornado.escape import utf8
+from tornado.util import raise_exc_info, Configurable, u, b
from tornado.test.util import unittest
# args bound in configure don't apply when using the subclass directly
obj = TestConfig2()
self.assertIs(obj.b, None)
+
+
+class UnicodeLiteralTest(unittest.TestCase):
+ def test_unicode_escapes(self):
+ self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))
# to convert our string literals. b() should only be applied to literal
# latin1 strings. Once we drop support for 2.5, we can remove this function
# and just use byte literals.
+#
+# Fake unicode literal support: Python 3.2 doesn't have the u'' marker for
+# literal strings, and alternative solutions like "from __future__ import
+# unicode_literals" have other problems (see PEP 414). u() can be applied
+# to ascii strings that include \u escapes (but they must not contain
+# literal non-ascii characters).
if str is unicode:
def b(s):
return s.encode('latin1')
+ def u(s):
+ return s
bytes_type = bytes
else:
def b(s):
return s
+ def u(s):
+ return s.decode('unicode_escape')
bytes_type = str