From: Umar Butler <8473183+umarbutler@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:00:54 +0000 (+1100) Subject: gh-128016: Improved invalid escape sequence warning message (#128020) X-Git-Tag: v3.14.0a5~414 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8d8b854824c4723d7c5924f1d5c6a397ea7214a5;p=thirdparty%2FPython%2Fcpython.git gh-128016: Improved invalid escape sequence warning message (#128020) --- diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index f30107225ff6..527d51857fc9 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -659,7 +659,8 @@ class CmdLineTest(unittest.TestCase): stderr.splitlines()[-3:], [ b' foo = """\\q"""', b' ^^^^^^^^', - b'SyntaxError: invalid escape sequence \'\\q\'' + b'SyntaxError: "\\q" is an invalid escape sequence. ' + b'Did you mean "\\\\q"? A raw string is also an option.' ], ) diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py index 787bd1b6a79e..0eefc22d11bc 100644 --- a/Lib/test/test_codeop.py +++ b/Lib/test/test_codeop.py @@ -282,7 +282,7 @@ class CodeopTests(unittest.TestCase): # Test that the warning is only returned once. with warnings_helper.check_warnings( ('"is" with \'str\' literal', SyntaxWarning), - ("invalid escape sequence", SyntaxWarning), + ('"\\\\e" is an invalid escape sequence', SyntaxWarning), ) as w: compile_command(r"'\e' is 0") self.assertEqual(len(w.warnings), 2) diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py index c7c6f684cd33..f56195ca2767 100644 --- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -116,7 +116,9 @@ class TestLiterals(unittest.TestCase): warnings.simplefilter('always', category=SyntaxWarning) eval("'''\n\\z'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(str(w[0].message), r'"\z" is an invalid escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -126,7 +128,8 @@ class TestLiterals(unittest.TestCase): eval("'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) self.assertEqual(exc.offset, 1) @@ -153,7 +156,9 @@ class TestLiterals(unittest.TestCase): eval("'''\n\\407'''") self.assertEqual(len(w), 1) self.assertEqual(str(w[0].message), - r"invalid octal escape sequence '\407'") + r'"\407" is an invalid octal escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -163,7 +168,8 @@ class TestLiterals(unittest.TestCase): eval("'''\n\\407'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) self.assertEqual(exc.offset, 1) @@ -205,7 +211,9 @@ class TestLiterals(unittest.TestCase): warnings.simplefilter('always', category=SyntaxWarning) eval("b'''\n\\z'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(str(w[0].message), r'"\z" is an invalid escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -215,7 +223,8 @@ class TestLiterals(unittest.TestCase): eval("b'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) @@ -228,8 +237,9 @@ class TestLiterals(unittest.TestCase): warnings.simplefilter('always', category=SyntaxWarning) eval("b'''\n\\407'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), - r"invalid octal escape sequence '\407'") + self.assertEqual(str(w[0].message), r'"\407" is an invalid octal escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -239,7 +249,8 @@ class TestLiterals(unittest.TestCase): eval("b'''\n\\407'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 35394f29fbe4..332919540da4 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -651,7 +651,9 @@ class CosmeticTestCase(ASTTestCase): def test_backslash_in_format_spec(self): import re - msg = re.escape("invalid escape sequence '\\ '") + msg = re.escape('"\\ " is an invalid escape sequence. ' + 'Such sequences will not work in the future. ' + 'Did you mean "\\\\ "? A raw string is also an option.') with self.assertWarnsRegex(SyntaxWarning, msg): self.check_ast_roundtrip("""f"{x:\\ }" """) self.check_ast_roundtrip("""f"{x:\\n}" """) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst new file mode 100644 index 000000000000..0832d777bc32 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst @@ -0,0 +1 @@ +Improved the ``SyntaxWarning`` message for invalid escape sequences to clarify that such sequences will raise a ``SyntaxError`` in future Python releases. The new message also suggests a potential fix, i.e., ``Did you mean "\\e"?``. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 024653546563..b3d1c425ad18 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1205,7 +1205,8 @@ PyObject *PyBytes_DecodeEscape(const char *s, unsigned char c = *first_invalid_escape; if ('4' <= c && c <= '7') { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid octal escape sequence '\\%.3s'", + "b\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. ", first_invalid_escape) < 0) { Py_DECREF(result); @@ -1214,7 +1215,8 @@ PyObject *PyBytes_DecodeEscape(const char *s, } else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", + "b\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. ", c) < 0) { Py_DECREF(result); @@ -1223,7 +1225,6 @@ PyObject *PyBytes_DecodeEscape(const char *s, } } return result; - } /* -------------------------------------------------------------------- */ /* object api */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3eafa2381c1a..d9952f764bb1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6853,7 +6853,8 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, unsigned char c = *first_invalid_escape; if ('4' <= c && c <= '7') { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid octal escape sequence '\\%.3s'", + "\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. ", first_invalid_escape) < 0) { Py_DECREF(result); @@ -6862,7 +6863,8 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, } else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. ", c) < 0) { Py_DECREF(result); diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 9537c543b0eb..9dd8f9ef28bd 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -28,9 +28,16 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token int octal = ('4' <= c && c <= '7'); PyObject *msg = octal - ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", - first_invalid_escape) - : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); + ? PyUnicode_FromFormat( + "\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%.3s\"? A raw string is also an option.", + first_invalid_escape, first_invalid_escape) + : PyUnicode_FromFormat( + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + c, c); if (msg == NULL) { return -1; } @@ -53,11 +60,16 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token error location, if p->known_err_token is not set. */ p->known_err_token = t; if (octal) { - RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", - first_invalid_escape); + RAISE_SYNTAX_ERROR( + "\"\\%.3s\" is an invalid octal escape sequence. " + "Did you mean \"\\\\%.3s\"? A raw string is also an option.", + first_invalid_escape, first_invalid_escape); } else { - RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); + RAISE_SYNTAX_ERROR( + "\"\\%c\" is an invalid escape sequence. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + c, c); } } Py_DECREF(msg); diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index 9c9d05bbef0f..5a416adb875a 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -113,7 +113,10 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval } PyObject *msg = PyUnicode_FromFormat( - "invalid escape sequence '\\%c'", + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + (char) first_invalid_escape_char, (char) first_invalid_escape_char ); @@ -129,7 +132,12 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval /* Replace the SyntaxWarning exception with a SyntaxError to get a more accurate error report */ PyErr_Clear(); - return _PyTokenizer_syntaxerror(tok, "invalid escape sequence '\\%c'", (char) first_invalid_escape_char); + + return _PyTokenizer_syntaxerror(tok, + "\"\\%c\" is an invalid escape sequence. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + (char) first_invalid_escape_char, + (char) first_invalid_escape_char); } return -1;