From: Serhiy Storchaka Date: Wed, 16 Aug 2023 08:36:36 +0000 (+0300) Subject: [3.11] gh-100061: Proper fix of the bug in the matching of possessive quantifiers... X-Git-Tag: v3.11.5~64 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=26137e2cf70b31e93b8bb26287647915bcaa5c99;p=thirdparty%2FPython%2Fcpython.git [3.11] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) (GH-108004) Restore the global Input Stream pointer after trying to match a sub-pattern. Co-authored-by: Ma Lin (cherry picked from commit abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9) Co-authored-by: SKO <41810398+uyw4687@users.noreply.github.com> --- diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index e30740b9c30b..d8e0d2fdefdc 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,13 +100,6 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) - elif op is POSSESSIVE_REPEAT: - # gh-106052: Possessive quantifiers do not work when the - # subpattern contains backtracking, i.e. "(?:ab?c)*+". - # Implement it as equivalent greedy qualifier in atomic group. - p = [(MAX_REPEAT, av)] - p = [(ATOMIC_GROUP, p)] - _compile(code, p, flags) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op,)) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index d2736dae20b1..f4d64dc9fcf2 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2397,6 +2397,16 @@ class ReTests(unittest.TestCase): self.assertFalse(template_re1.match('nope')) def test_bug_gh106052(self): + # gh-100061 + self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2)) + # gh-106052 self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2)) @@ -2502,7 +2512,6 @@ ATOMIC_GROUP 17: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2515,7 +2524,6 @@ POSSESSIVE_REPEAT 0 1 12: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst new file mode 100644 index 000000000000..dfed34f6ae97 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst @@ -0,0 +1,2 @@ +Fix a bug that causes wrong matches for regular expressions with possessive +qualifier. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index fb4c18b63d64..e83149825e2c 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1334,6 +1334,10 @@ dispatch: MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); + /* Restore the global Input Stream pointer + since it can change after jumps. */ + state->ptr = ptr; + /* We have sufficient matches, so exit loop. */ break; }