]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
authorDennis Sweeney <36520290+sweeneyde@users.noreply.github.com>
Wed, 2 Mar 2022 04:46:30 +0000 (23:46 -0500)
committerGitHub <noreply@github.com>
Wed, 2 Mar 2022 04:46:30 +0000 (23:46 -0500)
Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().

Include/cpython/bytesobject.h
Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst [new file with mode: 0644]
Modules/mmapmodule.c
Objects/bytesobject.c

index 6b3f55224fc553313380a6bcc2c56613ac45a835..38a0fe0af660f882e4ba45bc1c956973e5641e49 100644 (file)
@@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
     void *str,
     const void *bytes,
     Py_ssize_t size);
+
+/* Substring Search.
+
+   Returns the index of the first occurence of
+   a substring ("needle") in a larger text ("haystack").
+   If the needle is not found, return -1.
+   If the needle is found, add offset to the index.
+*/
+
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+              const char *needle, Py_ssize_t len_needle,
+              Py_ssize_t offset);
+
+/* Same as above, but search right-to-left */
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+                     const char *needle, Py_ssize_t len_needle,
+                     Py_ssize_t offset);
diff --git a/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst b/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
new file mode 100644 (file)
index 0000000..bd20a84
--- /dev/null
@@ -0,0 +1,3 @@
+For performance, use the optimized string-searching implementations\r
+from :meth:`~bytes.find` and :meth:`~bytes.rfind`\r
+for :meth:`~mmap.find` and :meth:`~mmap.rfind`.
index 26cedf1b9006d8b67f97f7507ddb4998c3f58796..6a038e72f93cfc746051228bae52c50f3bb9c49a 100644 (file)
@@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
     if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
                           &view, &start, &end)) {
         return NULL;
-    } else {
-        const char *p, *start_p, *end_p;
-        int sign = reverse ? -1 : 1;
-        const char *needle = view.buf;
-        Py_ssize_t len = view.len;
-
+    }
+    else {
         if (start < 0)
             start += self->size;
         if (start < 0)
@@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
         else if (end > self->size)
             end = self->size;
 
-        start_p = self->data + start;
-        end_p = self->data + end;
-
-        for (p = (reverse ? end_p - len : start_p);
-             (p >= start_p) && (p + len <= end_p); p += sign) {
-            Py_ssize_t i;
-            for (i = 0; i < len && needle[i] == p[i]; ++i)
-                /* nothing */;
-            if (i == len) {
-                PyBuffer_Release(&view);
-                return PyLong_FromSsize_t(p - self->data);
-            }
+        Py_ssize_t res;
+        if (reverse) {
+            res = _PyBytes_ReverseFind(
+                self->data + start, end - start,
+                view.buf, view.len, start);
+        }
+        else {
+            res = _PyBytes_Find(
+                self->data + start, end - start,
+                view.buf, view.len, start);
         }
         PyBuffer_Release(&view);
-        return PyLong_FromLong(-1);
+        return PyLong_FromSsize_t(res);
     }
 }
 
index 3d8a21696d1c8bbc6eaacc6479e48a4c5c0f2bf2..4c67b8f7af213b25e273d116df1466372cce4e8b 100644 (file)
@@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
 
 #undef STRINGLIB_GET_EMPTY
 
+Py_ssize_t
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+              const char *needle, Py_ssize_t len_needle,
+              Py_ssize_t offset)
+{
+    return stringlib_find(haystack, len_haystack,
+                          needle, len_needle, offset);
+}
+
+Py_ssize_t
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+                     const char *needle, Py_ssize_t len_needle,
+                     Py_ssize_t offset)
+{
+    return stringlib_rfind(haystack, len_haystack,
+                           needle, len_needle, offset);
+}
+
 PyObject *
 PyBytes_Repr(PyObject *obj, int smartquotes)
 {