]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-33234 Improve list() pre-sizing for inputs with known lengths (GH-9846)
authorPablo Galindo <Pablogsal@gmail.com>
Sun, 28 Oct 2018 20:16:26 +0000 (20:16 +0000)
committerGitHub <noreply@github.com>
Sun, 28 Oct 2018 20:16:26 +0000 (20:16 +0000)
The list() constructor isn't taking full advantage of known input
lengths or length hints. This commit makes the constructor
pre-size and not over-allocate when the input size is known (the
input collection implements __len__). One on the main advantages is
that this provides 12% difference in memory savings due to the difference
between overallocating and allocating exactly the input size.

For efficiency purposes and to avoid a performance regression for small
generators and collections, the size of the input object is calculated using
__len__ and not __length_hint__, as the later is considerably slower.

Lib/test/test_list.py
Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst [new file with mode: 0644]
Objects/listobject.c

index def4badbf5578e85b0facd46bc516e3219f34c1c..c5002b12732c9876829da8cc7a4e42271578454e 100644 (file)
@@ -1,5 +1,6 @@
 import sys
 from test import list_tests
+from test.support import cpython_only
 import pickle
 import unittest
 
@@ -157,5 +158,13 @@ class ListTest(list_tests.CommonTest):
         with self.assertRaises(TypeError):
             (3,) + L([1,2])
 
+    @cpython_only
+    def test_preallocation(self):
+        iterable = [0] * 10
+        iter_size = sys.getsizeof(iterable)
+
+        self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
+        self.assertEqual(iter_size, sys.getsizeof(list(range(10))))
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst
new file mode 100644 (file)
index 0000000..2f9cd62
--- /dev/null
@@ -0,0 +1,2 @@
+The list constructor will pre-size and not over-allocate when
+the input lenght is known.
index fa26444f847fc47a050259afe222dda4beedc372..e85fa5c526355e780b425f988689b9bce8247371 100644 (file)
@@ -76,6 +76,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
     return 0;
 }
 
+static int
+list_preallocate_exact(PyListObject *self, Py_ssize_t size)
+{
+    assert(self->ob_item == NULL);
+
+    PyObject **items;
+    size_t allocated;
+
+    allocated = (size_t)size;
+    if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    if (size == 0) {
+        allocated = 0;
+    }
+    items = (PyObject **)PyMem_New(PyObject*, allocated);
+    if (items == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    self->ob_item = items;
+    self->allocated = allocated;
+    return 0;
+}
+
 /* Debug statistic to compare allocations with reuse through the free list */
 #undef SHOW_ALLOC_COUNT
 #ifdef SHOW_ALLOC_COUNT
@@ -2683,6 +2710,19 @@ list___init___impl(PyListObject *self, PyObject *iterable)
         (void)_list_clear(self);
     }
     if (iterable != NULL) {
+        if (_PyObject_HasLen(iterable)) {
+            Py_ssize_t iter_len = PyObject_Size(iterable);
+            if (iter_len == -1) {
+                if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
+                    return -1;
+                }
+                PyErr_Clear();
+            }
+            if (iter_len > 0 && self->ob_item == NULL
+                && list_preallocate_exact(self, iter_len)) {
+                return -1;
+            }
+        }
         PyObject *rv = list_extend(self, iterable);
         if (rv == NULL)
             return -1;