]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-100061: Proper fix of the bug in the matching of possessive quantifiers...
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 16 Aug 2023 10:00:55 +0000 (13:00 +0300)
committerGitHub <noreply@github.com>
Wed, 16 Aug 2023 10:00:55 +0000 (12:00 +0200)
Restore the global Input Stream pointer after trying to match a sub-pattern.

.
(cherry picked from commit abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9)

Co-authored-by: SKO <41810398+uyw4687@users.noreply.github.com>
Lib/re/_compiler.py
Lib/test/test_re.py
Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst [new file with mode: 0644]
Modules/_sre/sre_lib.h

index e30740b9c30b0e8cc95d907195f0b99d7d71cfae..d8e0d2fdefdccad7150fe369a673733a60754ca1 100644 (file)
@@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
                 emit(ANY_ALL)
             else:
                 emit(ANY)
-        elif op is POSSESSIVE_REPEAT:
-            # gh-106052: Possessive quantifiers do not work when the
-            # subpattern contains backtracking, i.e. "(?:ab?c)*+".
-            # Implement it as equivalent greedy qualifier in atomic group.
-            p = [(MAX_REPEAT, av)]
-            p = [(ATOMIC_GROUP, p)]
-            _compile(code, p, flags)
         elif op in REPEATING_CODES:
             if flags & SRE_FLAG_TEMPLATE:
                 raise error("internal: unsupported template operator %r" % (op,))
index 85541f4451d031985245ee5ac94ce37682cacda0..5a5de523eba0526a78c7feea0ef5c5690f69c1b4 100644 (file)
@@ -2366,6 +2366,16 @@ class ReTests(unittest.TestCase):
         self.assertFalse(template_re1.match('nope'))
 
     def test_bug_gh106052(self):
+        # gh-100061
+        self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
+        self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
+        self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
+        # gh-106052
         self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
         self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
         self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
@@ -2471,7 +2481,6 @@ ATOMIC_GROUP
 17: SUCCESS
 ''')
 
-    @unittest.expectedFailure  # gh-106052
     def test_possesive_repeat_one(self):
         self.assertEqual(get_debug_out(r'a?+'), '''\
 POSSESSIVE_REPEAT 0 1
@@ -2484,7 +2493,6 @@ POSSESSIVE_REPEAT 0 1
 12: SUCCESS
 ''')
 
-    @unittest.expectedFailure  # gh-106052
     def test_possesive_repeat(self):
         self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
 POSSESSIVE_REPEAT 0 1
diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst
new file mode 100644 (file)
index 0000000..dfed34f
--- /dev/null
@@ -0,0 +1,2 @@
+Fix a bug that causes wrong matches for regular expressions with possessive
+qualifier.
index fb4c18b63d643d23a900dde0a4d903b8c9631d05..e83149825e2cdb899162ebb79ad2fc5cf3d6ae9f 100644 (file)
@@ -1334,6 +1334,10 @@ dispatch:
                     MARK_POP(ctx->lastmark);
                     LASTMARK_RESTORE();
 
+                    /* Restore the global Input Stream pointer
+                       since it can change after jumps. */
+                    state->ptr = ptr;
+
                     /* We have sufficient matches, so exit loop. */
                     break;
                 }