]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-100445: Improve error message for unterminated strings with escapes (#100446)
authorShantanu <12621235+hauntsaninja@users.noreply.github.com>
Wed, 18 Oct 2023 12:58:51 +0000 (05:58 -0700)
committerGitHub <noreply@github.com>
Wed, 18 Oct 2023 12:58:51 +0000 (13:58 +0100)
Lib/test/test_syntax.py
Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst [new file with mode: 0644]
Parser/lexer/lexer.c

index 00c5f624ceb3ab6e43aa4f83478e7e037c8002c0..7ebf9ca1707acd68c9a889266640efbd02d74bf5 100644 (file)
@@ -2298,8 +2298,14 @@ func(
 
     def test_error_string_literal(self):
 
-        self._check_error("'blech", "unterminated string literal")
-        self._check_error('"blech', "unterminated string literal")
+        self._check_error("'blech", r"unterminated string literal \(.*\)$")
+        self._check_error('"blech', r"unterminated string literal \(.*\)$")
+        self._check_error(
+            r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
+        )
+        self._check_error(
+            r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
+        )
         self._check_error("'''blech", "unterminated triple-quoted string literal")
         self._check_error('"""blech', "unterminated triple-quoted string literal")
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst
new file mode 100644 (file)
index 0000000..72f3884
--- /dev/null
@@ -0,0 +1 @@
+Improve error message for unterminated strings with escapes.
index 1a01bb0352a7b110fbb4bd8f4d4a24c547ffb6b2..2ba24a2c2405f222b242cb01cb09e16a4f44f947 100644 (file)
@@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         int quote = c;
         int quote_size = 1;             /* 1 or 3 */
         int end_quote_size = 0;
+        int has_escaped_quote = 0;
 
         /* Nodes of type STRING, especially multi line strings
            must be handled differently in order to get both
@@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
                 else {
-                    _PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at"
-                                     " line %d)", start);
+                    if (has_escaped_quote) {
+                        _PyTokenizer_syntaxerror(
+                            tok,
+                            "unterminated string literal (detected at line %d); "
+                            "perhaps you escaped the end quote?",
+                            start
+                        );
+                    } else {
+                        _PyTokenizer_syntaxerror(
+                            tok, "unterminated string literal (detected at line %d)", start
+                        );
+                    }
                     if (c != '\n') {
                         tok->done = E_EOLS;
                     }
@@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 end_quote_size = 0;
                 if (c == '\\') {
                     c = tok_nextc(tok);  /* skip escaped char */
+                    if (c == quote) {  /* but record whether the escaped char was a quote */
+                        has_escaped_quote = 1;
+                    }
                     if (c == '\r') {
                         c = tok_nextc(tok);
                     }