]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-38870: correctly escape unprintable characters on ast.unparse (GH-20166)
authorCyberSaxosTiGER <cybersaxostiger@gmail.com>
Mon, 18 May 2020 18:41:35 +0000 (21:41 +0300)
committerGitHub <noreply@github.com>
Mon, 18 May 2020 18:41:35 +0000 (19:41 +0100)
Unprintable characters such as `\x00` weren't correctly roundtripped
due to not using default string repr when generating docstrings. This
patch correctly encodes all unprintable characters (except `\n` and `\t`, which
are commonly used for formatting, and found unescaped).

Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
Lib/ast.py
Lib/test/test_unparse.py

index 0d3b19d922368b161722dc45b6a160af173f26c8..2edb7171e967190bdcba5e108db02687320c037d 100644 (file)
@@ -1090,6 +1090,15 @@ class _Unparser(NodeVisitor):
         self.write(node.id)
 
     def _write_docstring(self, node):
+        def esc_char(c):
+            if c in ("\n", "\t"):
+                # In the AST form, we don't know the author's intentation
+                # about how this should be displayed. We'll only escape
+                # \n and \t, because they are more likely to be unescaped
+                # in the source
+                return c
+            return c.encode('unicode_escape').decode('ascii')
+
         self.fill()
         if node.kind == "u":
             self.write("u")
@@ -1097,11 +1106,10 @@ class _Unparser(NodeVisitor):
         value = node.value
         if value:
             # Preserve quotes in the docstring by escaping them
-            value = value.replace("\\", "\\\\")
-            value = value.replace('"""', '""\"')
-            value = value.replace("\r", "\\r")
+            value = "".join(map(esc_char, value))
             if value[-1] == '"':
                 value = value.replace('"', '\\"', -1)
+            value = value.replace('"""', '""\\"')
 
         self.write(f'"""{value}"""')
 
index 67dcb1dae79ff20eb6cf5e7c2c0a46eff8deb049..6d828721b7740e9e09ca86c047373778246927ce 100644 (file)
@@ -324,7 +324,11 @@ class UnparseTestCase(ASTTestCase):
             '\\t',
             '\n',
             '\\n',
-            '\r\\r\t\\t\n\\n'
+            '\r\\r\t\\t\n\\n',
+            '""">>> content = \"\"\"blabla\"\"\" <<<"""',
+            r'foo\n\x00',
+            '🐍⛎𩸽üéş^\X\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
+
         )
         for docstring in docstrings:
             # check as Module docstrings for easy testing