From: Serhiy Storchaka Date: Tue, 22 Mar 2022 15:27:55 +0000 (+0200) Subject: bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH... X-Git-Tag: v3.11.0a7~162 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=492d4109f4d953c478cb48f17aa32adbb912623b;p=thirdparty%2FPython%2Fcpython.git bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH-32021) Affected functions are re.search(), re.split(), re.findall(), re.finditer() and re.sub(). --- diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index da827ca7c4e9..fd6db6a300d0 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -5,6 +5,7 @@ import locale import re import sre_compile import string +import time import unittest import warnings from re import Scanner @@ -2038,6 +2039,20 @@ class ReTests(unittest.TestCase): with self.assertRaisesRegex(TypeError, "got 'type'"): re.search("x*", type) + def test_search_anchor_at_beginning(self): + s = 'x'*10**7 + start = time.perf_counter() + for p in r'\Ay', r'^y': + self.assertIsNone(re.search(p, s)) + self.assertEqual(re.split(p, s), [s]) + self.assertEqual(re.findall(p, s), []) + self.assertEqual(list(re.finditer(p, s)), []) + self.assertEqual(re.sub(p, '', s), s) + t = time.perf_counter() - start + # Without optimization it takes 1 second on my computer. + # With optimization -- 0.0003 seconds. + self.assertLess(t, 0.1) + def test_possessive_quantifiers(self): """Test Possessive Quantifiers Test quantifiers of the form @+ for some repetition operator @, diff --git a/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst new file mode 100644 index 000000000000..5f9c1a19de22 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-21-08-32-19.bpo-42885.LCnTTp.rst @@ -0,0 +1,3 @@ +Optimize :func:`re.search`, :func:`re.split`, :func:`re.findall`, +:func:`re.finditer` and :func:`re.sub` for regular expressions starting with +``\A`` or ``^``. diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 956fd3fad916..a82210ff94a0 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -1693,6 +1693,13 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) state->start = state->ptr = ptr; status = SRE(match)(state, pattern, 1); state->must_advance = 0; + if (status == 0 && pattern[0] == SRE_OP_AT && + (pattern[1] == SRE_AT_BEGINNING || + pattern[1] == SRE_AT_BEGINNING_STRING)) + { + state->start = state->ptr = ptr = end; + return 0; + } while (status == 0 && ptr < end) { ptr++; RESET_CAPTURE_GROUP();