(1)
As in Standard C, up to three octal digits are accepted.
+ .. versionchanged:: 3.11
+ Octal escapes with value larger than ``0o377`` produce a :exc:`DeprecationWarning`.
+ In a future Python version they will be a :exc:`SyntaxWarning` and
+ eventually a :exc:`SyntaxError`.
+
(2)
Unlike in Standard C, exactly two hex digits are required.
Deprecated
==========
+* Octal escapes with value larger than ``0o377`` now produce
+ a :exc:`DeprecationWarning`.
+ In a future Python version they will be a :exc:`SyntaxWarning` and
+ eventually a :exc:`SyntaxError`.
+ (Contributed by Serhiy Storchaka in :issue:`81548`.)
+
* The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not
be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for
CPython). (Contributed by Victor Stinner in :issue:`40360`.)
check(br"[\418]", b"[!8]")
check(br"[\101]", b"[A]")
check(br"[\1010]", b"[A0]")
- check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]")
check(br"[\x410]", b"[A0]")
for i in range(97, 123):
check(br"\9", b"\\9")
with self.assertWarns(DeprecationWarning):
check(b"\\\xfa", b"\\\xfa")
+ for i in range(0o400, 0o1000):
+ with self.assertWarns(DeprecationWarning):
+ check(rb'\%o' % i, bytes([i & 0o377]))
def test_errors(self):
decode = codecs.escape_decode
check(br"\9", "\\9")
with self.assertWarns(DeprecationWarning):
check(b"\\\xfa", "\\\xfa")
+ for i in range(0o400, 0o1000):
+ with self.assertWarns(DeprecationWarning):
+ check(rb'\%o' % i, chr(i))
def test_decode_errors(self):
decode = codecs.unicode_escape_decode
warnings.simplefilter('always', category=DeprecationWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)
eval("'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
+ self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
+ self.assertEqual(exc.filename, '<string>')
+ self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.offset, 1)
+
+ def test_eval_str_invalid_octal_escape(self):
+ for i in range(0o400, 0o1000):
+ with self.assertWarns(DeprecationWarning):
+ self.assertEqual(eval(r"'\%o'" % i), chr(i))
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always', category=DeprecationWarning)
+ eval("'''\n\\407'''")
+ self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message),
+ r"invalid octal escape sequence '\407'")
+ self.assertEqual(w[0].filename, '<string>')
+ self.assertEqual(w[0].lineno, 1)
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('error', category=DeprecationWarning)
+ with self.assertRaises(SyntaxError) as cm:
+ eval("'''\n\\407'''")
+ exc = cm.exception
+ self.assertEqual(w, [])
+ self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)
self.assertEqual(exc.offset, 1)
warnings.simplefilter('always', category=DeprecationWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)
eval("b'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
+ self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
+ self.assertEqual(exc.filename, '<string>')
+ self.assertEqual(exc.lineno, 1)
+
+ def test_eval_bytes_invalid_octal_escape(self):
+ for i in range(0o400, 0o1000):
+ with self.assertWarns(DeprecationWarning):
+ self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always', category=DeprecationWarning)
+ eval("b'''\n\\407'''")
+ self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message),
+ r"invalid octal escape sequence '\407'")
+ self.assertEqual(w[0].filename, '<string>')
+ self.assertEqual(w[0].lineno, 1)
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('error', category=DeprecationWarning)
+ with self.assertRaises(SyntaxError) as cm:
+ eval("b'''\n\\407'''")
+ exc = cm.exception
+ self.assertEqual(w, [])
+ self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)
--- /dev/null
+Octal escapes with value larger than ``0o377`` now produce a
+:exc:`DeprecationWarning`. In a future Python version they will be a
+:exc:`SyntaxWarning` and eventually a :exc:`SyntaxError`.
if (s < end && '0' <= *s && *s <= '7')
c = (c<<3) + *s++ - '0';
}
+ if (c > 0377) {
+ if (*first_invalid_escape == NULL) {
+ *first_invalid_escape = s-3; /* Back up 3 chars, since we've
+ already incremented s. */
+ }
+ }
*p++ = c;
break;
case 'x':
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'",
- (unsigned char)*first_invalid_escape) < 0) {
- Py_DECREF(result);
- return NULL;
+ unsigned char c = *first_invalid_escape;
+ if ('4' <= c && c <= '7') {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid octal escape sequence '\\%.3s'",
+ first_invalid_escape) < 0)
+ {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ else {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\\%c'",
+ c) < 0)
+ {
+ Py_DECREF(result);
+ return NULL;
+ }
}
}
return result;
ch = (ch<<3) + *s++ - '0';
}
}
+ if (ch > 0377) {
+ if (*first_invalid_escape == NULL) {
+ *first_invalid_escape = s-3; /* Back up 3 chars, since we've
+ already incremented s. */
+ }
+ }
WRITE_CHAR(ch);
continue;
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'",
- (unsigned char)*first_invalid_escape) < 0) {
- Py_DECREF(result);
- return NULL;
+ unsigned char c = *first_invalid_escape;
+ if ('4' <= c && c <= '7') {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid octal escape sequence '\\%.3s'",
+ first_invalid_escape) < 0)
+ {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ else {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\\%c'",
+ c) < 0)
+ {
+ Py_DECREF(result);
+ return NULL;
+ }
}
}
return result;
//// STRING HANDLING FUNCTIONS ////
static int
-warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t)
+warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
{
+ unsigned char c = *first_invalid_escape;
+ int octal = ('4' <= c && c <= '7');
PyObject *msg =
- PyUnicode_FromFormat("invalid escape sequence '\\%c'", first_invalid_escape_char);
+ octal
+ ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
+ first_invalid_escape)
+ : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
if (msg == NULL) {
return -1;
}
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
error location, if p->known_err_token is not set. */
p->known_err_token = t;
- RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", first_invalid_escape_char);
+ if (octal) {
+ RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
+ first_invalid_escape);
+ }
+ else {
+ RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
+ }
}
Py_DECREF(msg);
return -1;
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
if (v != NULL && first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
+ if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
/* We have not decref u before because first_invalid_escape points
inside u. */
Py_XDECREF(u);
}
if (first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
+ if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
Py_DECREF(result);
return NULL;
}
break;
}
}
-
+
if (s == expr_end) {
if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') {
RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
decode_unicode_with_escapes(). */
continue;
}
- if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
+ if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
return -1;
}
}