re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
re.compile(r'(?P<a1>x)\1(?(1)y)')
+ re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
+ # New valid identifiers in Python 3
+ re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
+ re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)')
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ pat = '(?:%s)(?(200)z|t)' % pat
+ self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
+
+ def test_symbolic_groups_errors(self):
self.checkPatternError(r'(?P<a>)(?P<a>)',
"redefinition of group name 'a' as group 2; "
"was group 1")
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
- # New valid/invalid identifiers in Python 3
- re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
- re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)')
self.checkPatternError('(?P<ยฉ>x)', "bad character in group name 'ยฉ'", 4)
+ self.checkPatternError('(?P=ยฉ)', "bad character in group name 'ยฉ'", 4)
+ self.checkPatternError('(?(ยฉ)y)', "bad character in group name 'ยฉ'", 3)
+
+ def test_symbolic_refs(self):
+ self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
+ self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
+ self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
+ # New valid identifiers in Python 3
+ self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
+ self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
- pat = '(?:%s)(?(200)z|t)' % pat
- self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
+ self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
- def test_symbolic_refs(self):
+ def test_symbolic_refs_errors(self):
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
'missing >, unterminated name', 3)
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
- self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
- self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
"bad character in group name '-1'", 3)
- # New valid/invalid identifiers in Python 3
- self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
- self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx')
self.checkTemplateError('(?P<a>x)', r'\g<ยฉ>', 'xx',
"bad character in group name 'ยฉ'", 3)
- # Support > 100 groups.
- pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
- self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
+ self.checkTemplateError('(?P<a>x)', r'\g<ใ>', 'xx',
+ "bad character in group name 'ใ'", 3)
+ self.checkTemplateError('(?P<a>x)', r'\g<ยน>', 'xx',
+ "bad character in group name 'ยน'", 3)
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
pat = '(?:%s)(?(200)z)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
- self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
+ def test_re_groupref_exists_errors(self):
+ self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
+ self.checkPatternError(r'()(?(-1)a|b)',
+ "bad character in group name '-1'", 5)
+ self.checkPatternError(r'()(?(ใ)a|b)',
+ "bad character in group name 'ใ'", 5)
+ self.checkPatternError(r'()(?(ยน)a|b)',
+ "bad character in group name 'ยน'", 5)
+ self.checkPatternError(r'()(?(1',
+ "missing ), unterminated name", 5)
+ self.checkPatternError(r'()(?(1)a',
+ "missing ), unterminated subpattern", 2)
self.checkPatternError(r'()(?(1)a|b',
'missing ), unterminated subpattern', 2)
+ self.checkPatternError(r'()(?(1)a|b|c',
+ 'conditional backref with more than '
+ 'two branches', 10)
self.checkPatternError(r'()(?(1)a|b|c)',
'conditional backref with more than '
'two branches', 10)