[3.13] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Sun, 3 Aug 2025 16:58:49 +0000 (18:58 +0200)

committer GitHub <noreply@github.com>

Sun, 3 Aug 2025 16:58:49 +0000 (16:58 +0000)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sun, 3 Aug 2025 16:58:49 +0000 (18:58 +0200)
committer GitHub <noreply@github.com>
Sun, 3 Aug 2025 16:58:49 +0000 (16:58 +0000)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index c2ab603a8a775cfcd0692a1b15eb623d2dec8cb7..5e743d34d2a5732d643ce808aca2b71095c98022 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1821,6 +1821,34 @@ print(f'''{{
          for case in valid_cases:
              compile(case, "<string>", "exec")
  
+    def test_raw_fstring_format_spec(self):
+        # Test raw f-string format spec behavior (Issue #137314).
+        #
+        # Raw f-strings should preserve literal backslashes in format specifications,
+        # not interpret them as escape sequences.
+        class UnchangedFormat:
+            """Test helper that returns the format spec unchanged."""
+            def __format__(self, format):
+                return format
+
+        # Test basic escape sequences
+        self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ')
+        self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF')
+
+        # Test nested expressions with raw/non-raw combinations
+        self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ')
+        self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
+        self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
+
+        # Test continuation character in format specs
+        self.assertEqual(f"""{UnchangedFormat():{'a'\
+                        'b'}}""", 'ab')
+        self.assertEqual(rf"""{UnchangedFormat():{'a'\
+                         'b'}}""", 'ab')
+
+        # Test multiple format specs in same raw f-string
+        self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst

new file mode 100644 (file)

index 0000000..09d0c3e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst
@@ -0,0 +1,5 @@
+Fixed a regression where raw f-strings incorrectly interpreted
+escape sequences in format specifications. Raw f-strings now properly preserve
+literal backslashes in format specs, matching the behavior from Python 3.11.
+For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of
+``'ÿ'``. Patch by Pablo Galindo.
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c

index 99dd0976c6137da991b757b8062772232489568f..6f841ef2e4f9c3b214c73182d2eb6af6b3f9e180 100644 (file)
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -1,6 +1,7 @@
  #include <Python.h>
  
  #include "pegen.h"
+#include "lexer/state.h"
  #include "string_parser.h"
  #include "pycore_runtime.h"         // _PyRuntime
  #include "pycore_pystate.h"         // _PyInterpreterState_GET()
@@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
      if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
          return NULL;
      }
-    PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
+
+    // Check if we're inside a raw f-string for format spec decoding
+    int is_raw = 0;
+    if (INSIDE_FSTRING(p->tok)) {
+        tokenizer_mode *mode = TOK_GET_MODE(p->tok);
+        is_raw = mode->f_string_raw;
+    }
+
+    PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
      if (str == NULL) {
          return NULL;
      }
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c

index d29b0bbd25d2abfb3d46ee6e1e3c647e70d0c5ca..384239bd414c384546ad998ba9648aeac7cd6c2d 100644 (file)
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -22,22 +22,6 @@
                 || c == '_'\
                 || (c >= 128))
  
-#ifdef Py_DEBUG
-static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
-    assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
-    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
-}
-static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
-    assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
-    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
-}
-#else
-#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
-#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
-#endif
-
  #define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
  #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
                  _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h

index 9ed3babfdbfbf17634eb56bf5647bf55e49a9a5c..d8791d89dd02db9254df2ec9bcc84631308b635c 100644 (file)
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -1,6 +1,7 @@
  #ifndef _PY_LEXER_H_
  #define _PY_LEXER_H_
  
+#include "Python.h"
  #include "object.h"
  
  #define MAXINDENT 100       /* Max indentation level */
@@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *);
  void _PyToken_Free(struct token *);
  void _PyToken_Init(struct token *);
  
+#ifdef Py_DEBUG
+static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
+    assert(tok->tok_mode_stack_index >= 0);
+    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
+    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
+}
+static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
+    assert(tok->tok_mode_stack_index >= 0);
+    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
+    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
+}
+#else
+#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
+#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
+#endif
  
  #endif
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Sun, 3 Aug 2025 16:58:49 +0000 (18:58 +0200)
committer	GitHub <noreply@github.com>
	Sun, 3 Aug 2025 16:58:49 +0000 (16:58 +0000)
Lib/test/test_fstring.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst	[new file with mode: 0644]	patch \| blob
Parser/action_helpers.c		patch \| blob \| blame \| history
Parser/lexer/lexer.c		patch \| blob \| blame \| history
Parser/lexer/state.h		patch \| blob \| blame \| history