]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH...
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 22 Mar 2022 15:27:55 +0000 (17:27 +0200)
committerGitHub <noreply@github.com>
Tue, 22 Mar 2022 15:27:55 +0000 (17:27 +0200)
Affected functions are re.search(), re.split(), re.findall(), re.finditer()
and re.sub().

Lib/test/test_re.py
Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst [new file with mode: 0644]
Modules/sre_lib.h

index da827ca7c4e925362588e61a4ecc26ca57446b3d..fd6db6a300d037beeb101e36b3eb9b03e12644f2 100644 (file)
@@ -5,6 +5,7 @@ import locale
 import re
 import sre_compile
 import string
+import time
 import unittest
 import warnings
 from re import Scanner
@@ -2038,6 +2039,20 @@ class ReTests(unittest.TestCase):
         with self.assertRaisesRegex(TypeError, "got 'type'"):
             re.search("x*", type)
 
+    def test_search_anchor_at_beginning(self):
+        s = 'x'*10**7
+        start = time.perf_counter()
+        for p in r'\Ay', r'^y':
+            self.assertIsNone(re.search(p, s))
+            self.assertEqual(re.split(p, s), [s])
+            self.assertEqual(re.findall(p, s), [])
+            self.assertEqual(list(re.finditer(p, s)), [])
+            self.assertEqual(re.sub(p, '', s), s)
+        t = time.perf_counter() - start
+        # Without optimization it takes 1 second on my computer.
+        # With optimization -- 0.0003 seconds.
+        self.assertLess(t, 0.1)
+
     def test_possessive_quantifiers(self):
         """Test Possessive Quantifiers
         Test quantifiers of the form @+ for some repetition operator @,
diff --git a/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst
new file mode 100644 (file)
index 0000000..5f9c1a1
--- /dev/null
@@ -0,0 +1,3 @@
+Optimize :func:`re.search`, :func:`re.split`, :func:`re.findall`,
+:func:`re.finditer` and :func:`re.sub` for regular expressions starting with
+``\A`` or ``^``.
index 956fd3fad91649f8d776850f4207bfcac62c1197..a82210ff94a0fd3792a7bfee4c1deb4c4d6e5704 100644 (file)
@@ -1693,6 +1693,13 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
         state->start = state->ptr = ptr;
         status = SRE(match)(state, pattern, 1);
         state->must_advance = 0;
+        if (status == 0 && pattern[0] == SRE_OP_AT &&
+            (pattern[1] == SRE_AT_BEGINNING ||
+             pattern[1] == SRE_AT_BEGINNING_STRING))
+        {
+            state->start = state->ptr = ptr = end;
+            return 0;
+        }
         while (status == 0 && ptr < end) {
             ptr++;
             RESET_CAPTURE_GROUP();