From: SKO <41810398+uyw4687@users.noreply.github.com> Date: Wed, 16 Aug 2023 07:43:45 +0000 (+0900) Subject: gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) X-Git-Tag: v3.13.0a1~959 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9;p=thirdparty%2FPython%2Fcpython.git gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) Restore the global Input Stream pointer after trying to match a sub-pattern. Co-authored-by: Ma Lin --- diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index f5fd160ba004..d0a4c55caf6e 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,13 +100,6 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) - elif op is POSSESSIVE_REPEAT: - # gh-106052: Possessive quantifiers do not work when the - # subpattern contains backtracking, i.e. "(?:ab?c)*+". - # Implement it as equivalent greedy qualifier in atomic group. - p = [(MAX_REPEAT, av)] - p = [(ATOMIC_GROUP, p)] - _compile(code, p, flags) elif op in REPEATING_CODES: if _simple(av[2]): emit(REPEATING_CODES[op][2]) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index bf3698ac78a8..042f97f57ecf 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2342,7 +2342,17 @@ class ReTests(unittest.TestCase): self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) - def test_bug_gh106052(self): + def test_bug_gh100061(self): + # gh-100061 + self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2)) + # gh-106052 self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2)) @@ -2451,7 +2461,6 @@ ATOMIC_GROUP 17: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2464,7 +2473,6 @@ POSSESSIVE_REPEAT 0 1 12: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst new file mode 100644 index 000000000000..dfed34f6ae97 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst @@ -0,0 +1,2 @@ +Fix a bug that causes wrong matches for regular expressions with possessive +qualifier. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index c1a774f69090..ae80009fd63b 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1336,6 +1336,10 @@ dispatch: MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); + /* Restore the global Input Stream pointer + since it can change after jumps. */ + state->ptr = ptr; + /* We have sufficient matches, so exit loop. */ break; }