]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-88943: Improve syntax error for non-ASCII character that follows a numerica...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 2 Oct 2023 14:54:16 +0000 (07:54 -0700)
committerGitHub <noreply@github.com>
Mon, 2 Oct 2023 14:54:16 +0000 (16:54 +0200)
gh-88943: Improve syntax error for non-ASCII character that follows a numerical literal (GH-109081)

It now points on the invalid non-ASCII character, not on the valid numerical literal.
(cherry picked from commit b2729e93e9d73503b1fda4ea4fecd77c58909091)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Lib/test/test_grammar.py
Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst [new file with mode: 0644]
Parser/tokenizer.c

index b2415d579145f55d67e5bf8a6ee3a3e79671f27c..ad9f6c764319a9511e1519cd8ce6e5bf4733ea63 100644 (file)
@@ -236,6 +236,10 @@ class TokenTests(unittest.TestCase):
             check(f"[{num}for x in ()]")
             check(f"{num}spam", error=True)
 
+            # gh-88943: Invalid non-ASCII character following a numerical literal.
+            with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"):
+                compile(f"{num}⁄7", "<testcase>", "eval")
+
             with self.assertWarnsRegex(SyntaxWarning, r'invalid \w+ literal'):
                 compile(f"{num}is x", "<testcase>", "eval")
             with warnings.catch_warnings():
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst
new file mode 100644 (file)
index 0000000..a99830f
--- /dev/null
@@ -0,0 +1,3 @@
+Improve syntax error for non-ASCII character that follows a numerical
+literal. It now points on the invalid non-ASCII character, not on the valid
+numerical literal.
index c4c345e4c358e5c18421091619162791db36a867..9911fa55d47c9fc260eebd5ecbd47cc0dcdc25ae 100644 (file)
@@ -1648,7 +1648,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
         tok_nextc(tok);
     }
     else /* In future releases, only error will remain. */
-    if (is_potential_identifier_char(c)) {
+    if (c < 128 && is_potential_identifier_char(c)) {
         tok_backup(tok, c);
         syntaxerror(tok, "invalid %s literal", kind);
         return 0;