bpo-46503: Prevent an assert from firing when parsing some invalid \N sequences in...

author Eric V. Smith <ericvsmith@users.noreply.github.com>

Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)

committer GitHub <noreply@github.com>

Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)
author Eric V. Smith <ericvsmith@users.noreply.github.com>
Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)
committer GitHub <noreply@github.com>
Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index bd1ca943c7c09407828be82b9ee8bc01a5848ff2..d0b1ade15137babaf2fac8284c8cc88637375828 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -746,12 +746,16 @@ x = (
          # differently inside f-strings.
          self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
                              [r"f'\N'",
+                             r"f'\N '",
+                             r"f'\N  '",  # See bpo-46503.
                               r"f'\N{'",
                               r"f'\N{GREEK CAPITAL LETTER DELTA'",
  
                               # Here are the non-f-string versions,
                               #  which should give the same errors.
                               r"'\N'",
+                             r"'\N '",
+                             r"'\N  '",
                               r"'\N{'",
                               r"'\N{GREEK CAPITAL LETTER DELTA'",
                               ])
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst

new file mode 100644 (file)

index 0000000..e48028d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst
@@ -0,0 +1 @@
+Fix an assert when parsing some invalid \N escape sequences in f-strings.
diff --git a/Parser/string_parser.c b/Parser/string_parser.c

index 57d9b9ed3fdbbd3bb5566466e6f9aa5d9cb2f992..0b5e30ba2ca6a4cf5049a2f5f4132b8ba3823488 100644 (file)
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -442,12 +442,23 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
          if (!raw && ch == '\\' && s < end) {
              ch = *s++;
              if (ch == 'N') {
+                /* We need to look at and skip matching braces for "\N{name}"
+                   sequences because otherwise we'll think the opening '{'
+                   starts an expression, which is not the case with "\N".
+                   Keep looking for either a matched '{' '}' pair, or the end
+                   of the string. */
+
                  if (s < end && *s++ == '{') {
                      while (s < end && *s++ != '}') {
                      }
                      continue;
                  }
-                break;
+
+                /* This is an invalid "\N" sequence, since it's a "\N" not
+                   followed by a "{".  Just keep parsing this literal.  This
+                   error will be caught later by
+                   decode_unicode_with_escapes(). */
+                continue;
              }
              if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
                  return -1;
@@ -491,7 +502,8 @@ done:
              *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
                                                      s - literal_start,
                                                      NULL, NULL);
-        } else {
+        }
+        else {
              *literal = decode_unicode_with_escapes(p, literal_start,
                                                     s - literal_start, t);
          }
author	Eric V. Smith <ericvsmith@users.noreply.github.com>
	Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)
committer	GitHub <noreply@github.com>
	Tue, 25 Jan 2022 02:53:27 +0000 (21:53 -0500)
Lib/test/test_fstring.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst	[new file with mode: 0644]	patch \| blob
Parser/string_parser.c		patch \| blob \| blame \| history