]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-45811: Improve error message when source code contains invisible control characte...
authorPablo Galindo Salgado <Pablogsal@gmail.com>
Sat, 20 Nov 2021 18:28:28 +0000 (18:28 +0000)
committerGitHub <noreply@github.com>
Sat, 20 Nov 2021 18:28:28 +0000 (18:28 +0000)
Lib/test/test_syntax.py
Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst [new file with mode: 0644]
Parser/tokenizer.c

index 28414ba59493bb0a9397ab9cb0e7bf7ce66dff0f..fc3c62954a29b3ee0b78beb6339673e5fb7fe2f7 100644 (file)
@@ -1566,6 +1566,9 @@ def func2():
         for paren in ")]}":
             self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
 
+    def test_invisible_characters(self):
+        self._check_error('print\x17("Hello")', "invalid non-printable character")
+
     def test_match_call_does_not_raise_syntax_error(self):
         code = """
 def match(x):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst
new file mode 100644 (file)
index 0000000..4b31414
--- /dev/null
@@ -0,0 +1,2 @@
+Improve the tokenizer errors when encountering invisible control characters
+in the parser. Patch by Pablo Galindo
index f281c423d0e0c60a801dc7adb1a39c546d7ab5ae..69d2c08b439262fb8277f7fa0495ca15007f13f4 100644 (file)
@@ -2045,6 +2045,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         break;
     }
 
+    if (!Py_UNICODE_ISPRINTABLE(c)) {
+        char hex[9];
+        (void)PyOS_snprintf(hex, sizeof(hex), "%04X", c);
+        return syntaxerror(tok, "invalid non-printable character U+%s", hex);
+    }
+
     /* Punctuation character */
     *p_start = tok->start;
     *p_end = tok->cur;