]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-139871: Optimize small takes in bytearray.take_bytes (GH-141741)
authorCody Maloney <cmaloney@users.noreply.github.com>
Thu, 20 Nov 2025 07:49:05 +0000 (23:49 -0800)
committerGitHub <noreply@github.com>
Thu, 20 Nov 2025 07:49:05 +0000 (08:49 +0100)
When less than half the buffer is taken just copy that small part out
rather than doing a big alloc + memmove + big shrink.

Lib/test/test_bytes.py
Objects/bytearrayobject.c

index 86898bfcab91359aeafe1ab3319340ff61825d19..7ca38bb8c8421e5be8e7fd14a5a3f608cc79c28c 100644 (file)
@@ -1524,6 +1524,32 @@ class ByteArrayTest(BaseBytesTest, unittest.TestCase):
             self.assertRaises(BufferError, ba.take_bytes)
         self.assertEqual(ba.take_bytes(), b'abc')
 
+    @support.cpython_only  # tests an implementation detail
+    def test_take_bytes_optimization(self):
+        # Validate optimization around taking lots of little chunks out of a
+        # much bigger buffer. Save work by only copying a little rather than
+        # moving a lot.
+        ba = bytearray(b'abcdef' + b'0' * 1000)
+        start_alloc = ba.__alloc__()
+
+        # Take two bytes at a time, checking alloc doesn't change.
+        self.assertEqual(ba.take_bytes(2), b'ab')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 4 + 1000)
+        self.assertEqual(ba.take_bytes(2), b'cd')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 2 + 1000)
+        self.assertEqual(ba.take_bytes(2), b'ef')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 0 + 1000)
+        self.assertEqual(ba.__alloc__(), start_alloc)
+
+        # Take over half, alloc shrinks to exact size.
+        self.assertEqual(ba.take_bytes(501), b'0' * 501)
+        self.assertEqual(len(ba), 499)
+        bytes_header_size = sys.getsizeof(b'')
+        self.assertEqual(ba.__alloc__(), 499 + bytes_header_size)
+
     def test_setitem(self):
         def setitem_as_mapping(b, i, val):
             b[i] = val
index 99bfdec89f6c3a6a33500d289491de8c7aa2ffc1..99e1c9b13f7879acdc0ff31a0430bd23f368beed 100644 (file)
@@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
         return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
     }
 
-    // Copy remaining bytes to a new bytes.
     Py_ssize_t remaining_length = size - to_take;
+    // optimization: If taking less than leaving, just copy the small to_take
+    // portion out and move ob_start.
+    if (to_take < remaining_length) {
+        PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take);
+        if (ret == NULL) {
+            return NULL;
+        }
+        self->ob_start += to_take;
+        Py_SET_SIZE(self, remaining_length);
+        return ret;
+    }
+
+    // Copy remaining bytes to a new bytes.
     PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
                                                     remaining_length);
     if (remaining == NULL) {