]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.9] gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665)...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Fri, 22 Apr 2022 18:34:31 +0000 (11:34 -0700)
committerGitHub <noreply@github.com>
Fri, 22 Apr 2022 18:34:31 +0000 (21:34 +0300)
re.error is now raised instead of TypeError.
(cherry picked from commit 6ccfa31421393910b52936e0447625db06f2a655)
(cherry picked from commit 9c18d783c38fca57a63b61aa778d8a8d18945d95)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst [new file with mode: 0644]

index 53706676e9f7b8ae2dff7ebce28e226d328fd37b..d3ff196032b300051ee4c9fda7f3d3a967925677 100644 (file)
@@ -330,7 +330,7 @@ def _class_escape(source, escape):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}'))
             return LITERAL, c
@@ -390,7 +390,7 @@ def _escape(source, escape, state):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}'))
             return LITERAL, c
index 56e98b7aedce7c82ea3b77e60e0d87b91a908b2a..007064093c4d19f54c7060e36f7f8df9f365a342 100644 (file)
@@ -753,6 +753,10 @@ class ReTests(unittest.TestCase):
                                "undefined character name 'SPAM'", 0)
         self.checkPatternError(r'[\N{SPAM}]',
                                "undefined character name 'SPAM'", 1)
+        self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 0)
+        self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 1)
         self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
         self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
 
diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
new file mode 100644 (file)
index 0000000..4411c71
--- /dev/null
@@ -0,0 +1,3 @@
+Parsing ``\N`` escapes of Unicode Named Character Sequences in a
+:mod:`regular expression <re>` raises now :exc:`re.error` instead of
+``TypeError``.