gh-100445: Improve error message for unterminated strings with escapes (#100446)

author Shantanu <12621235+hauntsaninja@users.noreply.github.com>

Wed, 18 Oct 2023 12:58:51 +0000 (05:58 -0700)

committer GitHub <noreply@github.com>

Wed, 18 Oct 2023 12:58:51 +0000 (13:58 +0100)
author Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Wed, 18 Oct 2023 12:58:51 +0000 (05:58 -0700)
committer GitHub <noreply@github.com>
Wed, 18 Oct 2023 12:58:51 +0000 (13:58 +0100)
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py

index 00c5f624ceb3ab6e43aa4f83478e7e037c8002c0..7ebf9ca1707acd68c9a889266640efbd02d74bf5 100644 (file)
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -2298,8 +2298,14 @@ func(
  
      def test_error_string_literal(self):
  
-        self._check_error("'blech", "unterminated string literal")
-        self._check_error('"blech', "unterminated string literal")
+        self._check_error("'blech", r"unterminated string literal \(.*\)$")
+        self._check_error('"blech', r"unterminated string literal \(.*\)$")
+        self._check_error(
+            r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
+        )
+        self._check_error(
+            r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
+        )
          self._check_error("'''blech", "unterminated triple-quoted string literal")
          self._check_error('"""blech', "unterminated triple-quoted string literal")
  
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst

new file mode 100644 (file)

index 0000000..72f3884
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst
@@ -0,0 +1 @@
+Improve error message for unterminated strings with escapes.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c

index 1a01bb0352a7b110fbb4bd8f4d4a24c547ffb6b2..2ba24a2c2405f222b242cb01cb09e16a4f44f947 100644 (file)
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
          int quote = c;
          int quote_size = 1;             /* 1 or 3 */
          int end_quote_size = 0;
+        int has_escaped_quote = 0;
  
          /* Nodes of type STRING, especially multi line strings
             must be handled differently in order to get both
@@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                      return MAKE_TOKEN(ERRORTOKEN);
                  }
                  else {
-                    _PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at"
-                                     " line %d)", start);
+                    if (has_escaped_quote) {
+                        _PyTokenizer_syntaxerror(
+                            tok,
+                            "unterminated string literal (detected at line %d); "
+                            "perhaps you escaped the end quote?",
+                            start
+                        );
+                    } else {
+                        _PyTokenizer_syntaxerror(
+                            tok, "unterminated string literal (detected at line %d)", start
+                        );
+                    }
                      if (c != '\n') {
                          tok->done = E_EOLS;
                      }
@@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                  end_quote_size = 0;
                  if (c == '\\') {
                      c = tok_nextc(tok);  /* skip escaped char */
+                    if (c == quote) {  /* but record whether the escaped char was a quote */
+                        has_escaped_quote = 1;
+                    }
                      if (c == '\r') {
                          c = tok_nextc(tok);
                      }
author	Shantanu <12621235+hauntsaninja@users.noreply.github.com>
	Wed, 18 Oct 2023 12:58:51 +0000 (05:58 -0700)
committer	GitHub <noreply@github.com>
	Wed, 18 Oct 2023 12:58:51 +0000 (13:58 +0100)
Lib/test/test_syntax.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst	[new file with mode: 0644]	patch \| blob
Parser/lexer/lexer.c		patch \| blob \| blame \| history