]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.10] gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665...
authorSerhiy Storchaka <storchaka@gmail.com>
Fri, 22 Apr 2022 18:08:49 +0000 (21:08 +0300)
committerGitHub <noreply@github.com>
Fri, 22 Apr 2022 18:08:49 +0000 (21:08 +0300)
re.error is now raised instead of TypeError.
(cherry picked from commit 6ccfa31421393910b52936e0447625db06f2a655)

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst [new file with mode: 0644]

index 53706676e9f7b8ae2dff7ebce28e226d328fd37b..d3ff196032b300051ee4c9fda7f3d3a967925677 100644 (file)
@@ -330,7 +330,7 @@ def _class_escape(source, escape):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}'))
             return LITERAL, c
@@ -390,7 +390,7 @@ def _escape(source, escape, state):
             charname = source.getuntil('}', 'character name')
             try:
                 c = ord(unicodedata.lookup(charname))
-            except KeyError:
+            except (KeyError, TypeError):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}'))
             return LITERAL, c
index 9e5223a125b052e1501b1959ae931ced5cddbfb3..305ec8eef326b83f92f07e4d2fc759893e19bd44 100644 (file)
@@ -754,6 +754,10 @@ class ReTests(unittest.TestCase):
                                "undefined character name 'SPAM'", 0)
         self.checkPatternError(r'[\N{SPAM}]',
                                "undefined character name 'SPAM'", 1)
+        self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 0)
+        self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 1)
         self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
         self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
 
diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
new file mode 100644 (file)
index 0000000..4411c71
--- /dev/null
@@ -0,0 +1,3 @@
+Parsing ``\N`` escapes of Unicode Named Character Sequences in a
+:mod:`regular expression <re>` raises now :exc:`re.error` instead of
+``TypeError``.