]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-144157: Optimize bytes.translate() by deferring change detection (GH-144158)
authorGregory P. Smith <68491+gpshead@users.noreply.github.com>
Thu, 22 Jan 2026 17:21:07 +0000 (09:21 -0800)
committerGitHub <noreply@github.com>
Thu, 22 Jan 2026 17:21:07 +0000 (09:21 -0800)
Optimize bytes.translate() by deferring change detection

Move the equality check out of the hot loop to allow better compiler
optimization. Instead of checking each byte during translation, perform
a single memcmp at the end to determine if the input can be returned
unchanged.

This allows compilers to unroll and pipeline the loops, resulting in ~2x
throughput improvement for medium-to-large inputs (tested on an AMD zen2).
No change observed on small inputs.

It will also be faster for bytes subclasses as those do not need change
detection.

Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst [new file with mode: 0644]
Objects/bytesobject.c

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst
new file mode 100644 (file)
index 0000000..ff62d73
--- /dev/null
@@ -0,0 +1,2 @@
+:meth:`bytes.translate` now allows the compiler to unroll its loop more
+usefully for a 2x speedup in the common no-deletions specified case.
index 2b0925017f29e4eb6ebb83dfd1352968cb522163..56de99bde11682a3b623753aaf54d8a0442b8434 100644 (file)
@@ -2237,11 +2237,15 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table,
         /* If no deletions are required, use faster code */
         for (i = inlen; --i >= 0; ) {
             c = Py_CHARMASK(*input++);
-            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
-                changed = 1;
-        }
-        if (!changed && PyBytes_CheckExact(input_obj)) {
-            Py_SETREF(result, Py_NewRef(input_obj));
+            *output++ = table_chars[c];
+        }
+        /* Check if anything changed (for returning original object) */
+        /* We save this check until the end so that the compiler will */
+        /* unroll the loop above leading to MUCH faster code. */
+        if (PyBytes_CheckExact(input_obj)) {
+            if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
+                Py_SETREF(result, Py_NewRef(input_obj));
+            }
         }
         PyBuffer_Release(&del_table_view);
         PyBuffer_Release(&table_view);