]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665)
authorSerhiy Storchaka <storchaka@gmail.com>
Fri, 22 Apr 2022 15:35:28 +0000 (18:35 +0300)
committerGitHub <noreply@github.com>
Fri, 22 Apr 2022 15:35:28 +0000 (18:35 +0300)
re.error is now raised instead of TypeError.

Lib/re/_parser.py
Lib/test/test_re.py
Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst [new file with mode: 0644]

index f191f809a1491e4512f9dc15d17649dc137bdae7..6588862493077090ff95f382213da4a577c7271a 100644 (file)
@@ -333,7 +333,7 @@ def _class_escape(source, escape):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}')) from None
             return LITERAL, c
@@ -393,7 +393,7 @@ def _escape(source, escape, state):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}')) from None
             return LITERAL, c
index 781bfd6ea2edacdac77359adaea7239e1a79b972..2d3fef8589e2a3db289830074790a2c8d2cfc5f5 100644 (file)
@@ -772,6 +772,10 @@ class ReTests(unittest.TestCase):
                                "undefined character name 'SPAM'", 0)
         self.checkPatternError(r'[\N{SPAM}]',
                                "undefined character name 'SPAM'", 1)
+        self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 0)
+        self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 1)
         self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
         self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
 
diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
new file mode 100644 (file)
index 0000000..4411c71
--- /dev/null
@@ -0,0 +1,3 @@
+Parsing ``\N`` escapes of Unicode Named Character Sequences in a
+:mod:`regular expression <re>` raises now :exc:`re.error` instead of
+``TypeError``.