gh-133197: Improve error message for incompatible string / bytes prefixes (#133242)

author sobolevn <mail@sobolevn.me>

Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)

committer GitHub <noreply@github.com>

Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)
author sobolevn <mail@sobolevn.me>
Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)
committer GitHub <noreply@github.com>
Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index a10d1fd5fd2b1fda7372df7e9e2193e98d8e7e8f..dd58e032a8befe01610993b7bdaf9d3ac4151370 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1304,7 +1304,7 @@ x = (
                               "Bf''",
                               "BF''",]
          double_quote_cases = [case.replace("'", '"') for case in single_quote_cases]
-        self.assertAllRaise(SyntaxError, 'invalid syntax',
+        self.assertAllRaise(SyntaxError, 'prefixes are incompatible',
                              single_quote_cases + double_quote_cases)
  
      def test_leading_trailing_spaces(self):
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py

index c0681bccd9ec32a2bc436702a277c2a621a56699..c39565144bf7f404ad8e3c5f6d273ae1aa580444 100644 (file)
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -216,6 +216,27 @@ the \'lazy\' dog.\n\
  '
          self.assertEqual(x, y)
  
+    def test_string_prefixes(self):
+        def check(s):
+            parsed = eval(s)
+            self.assertIs(type(parsed), str)
+            self.assertGreater(len(parsed), 0)
+
+        check("u'abc'")
+        check("r'abc\t'")
+        check("rf'abc\a {1 + 1}'")
+        check("fr'abc\a {1 + 1}'")
+
+    def test_bytes_prefixes(self):
+        def check(s):
+            parsed = eval(s)
+            self.assertIs(type(parsed), bytes)
+            self.assertGreater(len(parsed), 0)
+
+        check("b'abc'")
+        check("br'abc\t'")
+        check("rb'abc\a'")
+
      def test_ellipsis(self):
          x = ...
          self.assertTrue(x is Ellipsis)
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py

index 74e52ac8fd5e7dca172c45c6ccffe8e03ae06aef..0ee17849e2812172eed58715ef954b39111fe333 100644 (file)
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -1877,21 +1877,77 @@ SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' ins
  Traceback (most recent call last):
  SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' instead of '='?
  
->>> ft'abc'
+>>> ub''
  Traceback (most recent call last):
-SyntaxError: can't use 'f' and 't' string prefixes together
+SyntaxError: 'u' and 'b' prefixes are incompatible
  
->>> tf"{x=}"
+>>> bu"привет"
  Traceback (most recent call last):
-SyntaxError: can't use 'f' and 't' string prefixes together
+SyntaxError: 'u' and 'b' prefixes are incompatible
  
->>> tb''
+>>> ur''
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> ru"\t"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> uf'{1 + 1}'
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> fu""
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> ut'{1}'
+Traceback (most recent call last):
+SyntaxError: 'u' and 't' prefixes are incompatible
+
+>>> tu"234"
+Traceback (most recent call last):
+SyntaxError: 'u' and 't' prefixes are incompatible
+
+>>> bf'{x!r}'
+Traceback (most recent call last):
+SyntaxError: 'b' and 'f' prefixes are incompatible
+
+>>> fb"text"
  Traceback (most recent call last):
-SyntaxError: can't use 'b' and 't' string prefixes together
+SyntaxError: 'b' and 'f' prefixes are incompatible
  
  >>> bt"text"
  Traceback (most recent call last):
-SyntaxError: can't use 'b' and 't' string prefixes together
+SyntaxError: 'b' and 't' prefixes are incompatible
+
+>>> tb''
+Traceback (most recent call last):
+SyntaxError: 'b' and 't' prefixes are incompatible
+
+>>> tf"{0.3:.02f}"
+Traceback (most recent call last):
+SyntaxError: 'f' and 't' prefixes are incompatible
+
+>>> ft'{x=}'
+Traceback (most recent call last):
+SyntaxError: 'f' and 't' prefixes are incompatible
+
+>>> tfu"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> turf"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> burft"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'b' prefixes are incompatible
+
+>>> brft"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'b' and 'f' prefixes are incompatible
  
  >>> t'{x}' = 42
  Traceback (most recent call last):
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst

new file mode 100644 (file)

index 0000000..009bc37
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst
@@ -0,0 +1,2 @@
+Improve :exc:`SyntaxError` error messages for incompatible string / bytes
+prefixes.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c

index 98adf7447c56269c170508b5877735f6f8c493bb..4d10bccf0a53f2fb08ca01dee7c3c76921a37360 100644 (file)
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -404,6 +404,51 @@ tok_continuation_line(struct tok_state *tok) {
      return c;
  }
  
+static int
+maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+                                             int saw_b, int saw_r, int saw_u,
+                                             int saw_f, int saw_t) {
+    // Supported: rb, rf, rt (in any order)
+    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
+    do {                                                                  \
+        (void)_PyTokenizer_syntaxerror_known_range(                       \
+            tok, (int)(tok->start + 1 - tok->line_start),                 \
+            (int)(tok->cur - tok->line_start),                            \
+            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
+        return -1;                                                        \
+    } while (0)
+
+    if (saw_u && saw_b) {
+        RETURN_SYNTAX_ERROR("u", "b");
+    }
+    if (saw_u && saw_r) {
+        RETURN_SYNTAX_ERROR("u", "r");
+    }
+    if (saw_u && saw_f) {
+        RETURN_SYNTAX_ERROR("u", "f");
+    }
+    if (saw_u && saw_t) {
+        RETURN_SYNTAX_ERROR("u", "t");
+    }
+
+    if (saw_b && saw_f) {
+        RETURN_SYNTAX_ERROR("b", "f");
+    }
+    if (saw_b && saw_t) {
+        RETURN_SYNTAX_ERROR("b", "t");
+    }
+
+    if (saw_f && saw_t) {
+        RETURN_SYNTAX_ERROR("f", "t");
+    }
+
+#undef RETURN_SYNTAX_ERROR
+
+    return 0;
+}
+
  static int
  tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
  {
@@ -648,22 +693,22 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
          /* Process the various legal combinations of b"", r"", u"", and f"". */
          int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
          while (1) {
-            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
+            if (!saw_b && (c == 'b' || c == 'B')) {
                  saw_b = 1;
+            }
              /* Since this is a backwards compatibility support literal we don't
                 want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r || saw_f || saw_t)
-                     && (c == 'u'|| c == 'U')) {
+            else if (!saw_u && (c == 'u'|| c == 'U')) {
                  saw_u = 1;
              }
              /* ur"" and ru"" are not supported */
-            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
+            else if (!saw_r && (c == 'r' || c == 'R')) {
                  saw_r = 1;
              }
-            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+            else if (!saw_f && (c == 'f' || c == 'F')) {
                  saw_f = 1;
              }
-            else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) {
+            else if (!saw_t && (c == 't' || c == 'T')) {
                  saw_t = 1;
              }
              else {
@@ -671,17 +716,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
              }
              c = tok_nextc(tok);
              if (c == '"' || c == '\'') {
-                if (saw_b && saw_t) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
-                        tok, (int)(tok->start + 1 - tok->line_start),
-                        (int)(tok->cur - tok->line_start),
-                        "can't use 'b' and 't' string prefixes together"));
-                }
-                if (saw_f && saw_t) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
-                        tok, (int)(tok->start + 1 - tok->line_start),
-                        (int)(tok->cur - tok->line_start),
-                        "can't use 'f' and 't' string prefixes together"));
+                // Raise error on incompatible string prefixes:
+                int status = maybe_raise_syntax_error_for_string_prefixes(
+                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+                if (status < 0) {
+                    return MAKE_TOKEN(ERRORTOKEN);
                  }
  
                  // Handle valid f or t string creation:
author	sobolevn <mail@sobolevn.me>
	Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)
committer	GitHub <noreply@github.com>
	Fri, 2 May 2025 11:28:17 +0000 (14:28 +0300)
Lib/test/test_fstring.py		patch \| blob \| blame \| history
Lib/test/test_grammar.py		patch \| blob \| blame \| history
Lib/test/test_syntax.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst	[new file with mode: 0644]	patch \| blob
Parser/lexer/lexer.c		patch \| blob \| blame \| history