From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Apr 2022 14:35:21 +0000 (-0700) Subject: Add more tests for group names and refs in RE (GH-91695) X-Git-Tag: v3.9.13~103 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=63af7b3b11d93b6636afb83c91d7eb7d2558ba20;p=thirdparty%2FPython%2Fcpython.git Add more tests for group names and refs in RE (GH-91695) (cherry picked from commit 74070085da5322ac83c954f101f2caa150655be2) Co-authored-by: Serhiy Storchaka --- diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 48a609b5a003..56e98b7aedce 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -217,6 +217,16 @@ class ReTests(unittest.TestCase): re.compile(r'(?Px)(?P=a)(?(a)y)') re.compile(r'(?Px)(?P=a1)(?(a1)y)') re.compile(r'(?Px)\1(?(1)y)') + re.compile(b'(?Px)(?P=a1)(?(a1)y)') + # New valid identifiers in Python 3 + re.compile('(?P<µ>x)(?P=µ)(?(µ)y)') + re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)') + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?(200)z|t)' % pat + self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + + def test_symbolic_groups_errors(self): self.checkPatternError(r'(?P)(?P)', "redefinition of group name 'a' as group 2; " "was group 1") @@ -242,16 +252,22 @@ class ReTests(unittest.TestCase): self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) - # New valid/invalid identifiers in Python 3 - re.compile('(?P<µ>x)(?P=µ)(?(µ)y)') - re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)') self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4) + self.checkPatternError('(?P=©)', "bad character in group name '©'", 4) + self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3) + + def test_symbolic_refs(self): + self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') + self.assertEqual(re.sub(b'(?Px)', br'\g', b'xx'), b'xx') + # New valid identifiers in Python 3 + self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx') # Support > 100 groups. pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - pat = '(?:%s)(?(200)z|t)' % pat - self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') - def test_symbolic_refs(self): + def test_symbolic_refs_errors(self): self.checkTemplateError('(?Px)', r'\g, unterminated name', 3) self.checkTemplateError('(?Px)', r'\g<', 'xx', @@ -269,18 +285,14 @@ class ReTests(unittest.TestCase): 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') self.checkTemplateError('(?Px)', r'\g<-1>', 'xx', "bad character in group name '-1'", 3) - # New valid/invalid identifiers in Python 3 - self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') - self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx') self.checkTemplateError('(?Px)', r'\g<©>', 'xx', "bad character in group name '©'", 3) - # Support > 100 groups. - pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') + self.checkTemplateError('(?Px)', r'\g<㊀>', 'xx', + "bad character in group name '㊀'", 3) + self.checkTemplateError('(?Px)', r'\g<¹>', 'xx', + "bad character in group name '¹'", 3) def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -542,9 +554,23 @@ class ReTests(unittest.TestCase): pat = '(?:%s)(?(200)z)' % pat self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) - self.checkPatternError(r'(?P)(?(0))', 'bad group number', 10) + def test_re_groupref_exists_errors(self): + self.checkPatternError(r'(?P)(?(0)a|b)', 'bad group number', 10) + self.checkPatternError(r'()(?(-1)a|b)', + "bad character in group name '-1'", 5) + self.checkPatternError(r'()(?(㊀)a|b)', + "bad character in group name '㊀'", 5) + self.checkPatternError(r'()(?(¹)a|b)', + "bad character in group name '¹'", 5) + self.checkPatternError(r'()(?(1', + "missing ), unterminated name", 5) + self.checkPatternError(r'()(?(1)a', + "missing ), unterminated subpattern", 2) self.checkPatternError(r'()(?(1)a|b', 'missing ), unterminated subpattern', 2) + self.checkPatternError(r'()(?(1)a|b|c', + 'conditional backref with more than ' + 'two branches', 10) self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10)