for case in valid_cases:
compile(case, "<string>", "exec")
+ def test_raw_fstring_format_spec(self):
+ # Test raw f-string format spec behavior (Issue #137314).
+ #
+ # Raw f-strings should preserve literal backslashes in format specifications,
+ # not interpret them as escape sequences.
+ class UnchangedFormat:
+ """Test helper that returns the format spec unchanged."""
+ def __format__(self, format):
+ return format
+
+ # Test basic escape sequences
+ self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ')
+ self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF')
+
+ # Test nested expressions with raw/non-raw combinations
+ self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ')
+ self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
+ self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
+
+ # Test continuation character in format specs
+ self.assertEqual(f"""{UnchangedFormat():{'a'\
+ 'b'}}""", 'ab')
+ self.assertEqual(rf"""{UnchangedFormat():{'a'\
+ 'b'}}""", 'ab')
+
+ # Test multiple format specs in same raw f-string
+ self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')
+
if __name__ == '__main__':
unittest.main()
--- /dev/null
+Fixed a regression where raw f-strings incorrectly interpreted
+escape sequences in format specifications. Raw f-strings now properly preserve
+literal backslashes in format specs, matching the behavior from Python 3.11.
+For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of
+``'ÿ'``. Patch by Pablo Galindo.
#include <Python.h>
#include "pegen.h"
+#include "lexer/state.h"
#include "string_parser.h"
#include "pycore_runtime.h" // _PyRuntime
#include "pycore_pystate.h" // _PyInterpreterState_GET()
if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
return NULL;
}
- PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
+
+ // Check if we're inside a raw f-string for format spec decoding
+ int is_raw = 0;
+ if (INSIDE_FSTRING(p->tok)) {
+ tokenizer_mode *mode = TOK_GET_MODE(p->tok);
+ is_raw = mode->f_string_raw;
+ }
+
+ PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
if (str == NULL) {
return NULL;
}
|| c == '_'\
|| (c >= 128))
-#ifdef Py_DEBUG
-static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
- assert(tok->tok_mode_stack_index >= 0);
- assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
- return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
-}
-static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
- assert(tok->tok_mode_stack_index >= 0);
- assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
- return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
-}
-#else
-#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
-#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
-#endif
-
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
#ifndef _PY_LEXER_H_
#define _PY_LEXER_H_
+#include "Python.h"
#include "object.h"
#define MAXINDENT 100 /* Max indentation level */
void _PyToken_Free(struct token *);
void _PyToken_Init(struct token *);
+#ifdef Py_DEBUG
+static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
+ assert(tok->tok_mode_stack_index >= 0);
+ assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
+ return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
+}
+static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
+ assert(tok->tok_mode_stack_index >= 0);
+ assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
+ return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
+}
+#else
+#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
+#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
+#endif
#endif