From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Sun, 23 Apr 2023 23:21:27 +0000 (-0700) Subject: [3.11] gh-102310: Change error range for invalid bytes literals (GH-103663) (#103703) X-Git-Tag: v3.11.4~182 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7b2ac6cf3dd33d591d0e14a020c84aa331d29932;p=thirdparty%2FPython%2Fcpython.git [3.11] gh-102310: Change error range for invalid bytes literals (GH-103663) (#103703) --- diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 50168d9200a2..81456d62a37c 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1803,6 +1803,30 @@ x: *b Traceback (most recent call last): ... SyntaxError: invalid syntax + +Invalid bytes literals: + + >>> b"Ā" + Traceback (most recent call last): + ... + b"Ā" + ^^^ + SyntaxError: bytes can only contain ASCII literal characters + + >>> b"абвгде" + Traceback (most recent call last): + ... + b"абвгде" + ^^^^^^^^ + SyntaxError: bytes can only contain ASCII literal characters + + >>> b"abc ъющый" # first 3 letters are ascii + Traceback (most recent call last): + ... + b"abc ъющый" + ^^^^^^^^^^^ + SyntaxError: bytes can only contain ASCII literal characters + """ import re diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-21-17-03-14.gh-issue-102310.anLjDx.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-21-17-03-14.gh-issue-102310.anLjDx.rst new file mode 100644 index 000000000000..15cb6c64adba --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-21-17-03-14.gh-issue-102310.anLjDx.rst @@ -0,0 +1 @@ +Change the error range for invalid bytes literals. diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 4f3f046673cb..fb2b9808af15 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -259,7 +259,8 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, const char *ch; for (ch = s; *ch; ch++) { if (Py_CHARMASK(*ch) >= 0x80) { - RAISE_SYNTAX_ERROR( + RAISE_SYNTAX_ERROR_KNOWN_LOCATION( + t, "bytes can only contain ASCII " "literal characters"); return -1;