[3.13] gh-135148: Correctly handle f/t strings with comments and debug expressions...

author Lysandros Nikolaou <lisandrosnik@gmail.com>

Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)

committer GitHub <noreply@github.com>

Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)
author Lysandros Nikolaou <lisandrosnik@gmail.com>
Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)
committer GitHub <noreply@github.com>
Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index f1fa26c20ab5ae333c3f1701a863142d353f18e4..c2ab603a8a775cfcd0692a1b15eb623d2dec8cb7 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1641,6 +1641,18 @@ x = (
          self.assertEqual(f"{1+2 = # my comment
    }", '1+2 = \n  3')
  
+        self.assertEqual(f'{""" # booo
+  """=}', '""" # booo\n  """=\' # booo\\n  \'')
+
+        self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
+        self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
+
+        self.assertEqual(f'{ # some comment goes here
+  """hello"""=}',  ' \n  """hello"""=\'hello\'')
+        self.assertEqual(f'{"""# this is not a comment
+        a""" # this is a comment
+        }', '# this is not a comment\n        a')
+
          # These next lines contains tabs.  Backslash escapes don't
          # work in f-strings.
          # patchcheck doesn't like these tabs.  So the only way to test
diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst b/Misc/NEWS.d/next/Core and Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst

new file mode 100644 (file)

index 0000000..9b1f624
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
@@ -0,0 +1,3 @@
+Fixed a bug where f-string debug expressions (using =) would incorrectly
+strip out parts of strings containing escaped quotes and # characters. Patch
+by Pablo Galindo.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c

index f7e2359604410931bf49eca14a9f2d5f37781031..d29b0bbd25d2abfb3d46ee6e1e3c647e70d0c5ca 100644 (file)
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -118,38 +118,88 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
      }
      PyObject *res = NULL;
  
-    // Check if there is a # character in the expression
+    // Look for a # character outside of string literals
      int hash_detected = 0;
+    int in_string = 0;
+    char quote_char = 0;
+
      for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
-        if (tok_mode->last_expr_buffer[i] == '#') {
+        char ch = tok_mode->last_expr_buffer[i];
+
+        // Skip escaped characters
+        if (ch == '\\') {
+            i++;
+            continue;
+        }
+
+        // Handle quotes
+        if (ch == '"' || ch == '\'') {
+            // The following if/else block works becase there is an off number
+            // of quotes in STRING tokens and the lexer only ever reaches this
+            // function with valid STRING tokens.
+            // For example: """hello"""
+            // First quote: in_string = 1
+            // Second quote: in_string = 0
+            // Third quote: in_string = 1
+            if (!in_string) {
+                in_string = 1;
+                quote_char = ch;
+            }
+            else if (ch == quote_char) {
+                in_string = 0;
+            }
+            continue;
+        }
+
+        // Check for # outside strings
+        if (ch == '#' && !in_string) {
              hash_detected = 1;
              break;
          }
      }
-
+    // If we found a # character in the expression, we need to handle comments
      if (hash_detected) {
-        Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
-        char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
+        // Allocate buffer for processed result
+        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
          if (!result) {
              return -1;
          }
  
-        Py_ssize_t i = 0;
-        Py_ssize_t j = 0;
+        Py_ssize_t i = 0;  // Input position
+        Py_ssize_t j = 0;  // Output position
+        in_string = 0;     // Whether we're in a string
+        quote_char = 0;    // Current string quote char
  
-        for (i = 0, j = 0; i < input_length; i++) {
-            if (tok_mode->last_expr_buffer[i] == '#') {
-                // Skip characters until newline or end of string
-                while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
-                    if (tok_mode->last_expr_buffer[i] == '\n') {
-                        result[j++] = tok_mode->last_expr_buffer[i];
-                        break;
-                    }
+        // Process each character
+        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+            char ch = tok_mode->last_expr_buffer[i];
+
+            // Handle string quotes
+            if (ch == '"' || ch == '\'') {
+                // See comment above to understand this part
+                if (!in_string) {
+                    in_string = 1;
+                    quote_char = ch;
+                } else if (ch == quote_char) {
+                    in_string = 0;
+                }
+                result[j++] = ch;
+            }
+            // Skip comments
+            else if (ch == '#' && !in_string) {
+                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
+                       tok_mode->last_expr_buffer[i] != '\n') {
                      i++;
                  }
-            } else {
-                result[j++] = tok_mode->last_expr_buffer[i];
+                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+                    result[j++] = '\n';
+                }
+            }
+            // Copy other chars
+            else {
+                result[j++] = ch;
              }
+            i++;
          }
  
          result[j] = '\0';  // Null-terminate the result string
@@ -161,11 +211,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
              tok_mode->last_expr_size - tok_mode->last_expr_end,
              NULL
          );
-
      }
  
-
-   if (!res) {
+    if (!res) {
          return -1;
      }
      token->metadata = res;
author	Lysandros Nikolaou <lisandrosnik@gmail.com>
	Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)
committer	GitHub <noreply@github.com>
	Mon, 21 Jul 2025 14:54:29 +0000 (16:54 +0200)
Lib/test/test_fstring.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst	[new file with mode: 0644]	patch \| blob
Parser/lexer/lexer.c		patch \| blob \| blame \| history