]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-127058: Make `PySequence_Tuple` safer and probably faster. (#127758)
authorMark Shannon <mark@hotpy.org>
Wed, 11 Dec 2024 14:02:59 +0000 (14:02 +0000)
committerGitHub <noreply@github.com>
Wed, 11 Dec 2024 14:02:59 +0000 (14:02 +0000)
* Use a small buffer, then list when constructing a tuple from an arbitrary sequence.

Include/internal/pycore_list.h
Lib/test/test_capi/test_tuple.py
Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst [new file with mode: 0644]
Objects/abstract.c
Objects/listobject.c

index f03e484f5ef8b00134a51e75a566c7de4c46365c..836ff30abfcedbd07644224d570f2bc6340abd27 100644 (file)
@@ -62,7 +62,7 @@ typedef struct {
 union _PyStackRef;
 
 PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n);
-
+PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v);
 
 #ifdef __cplusplus
 }
index e6b49caeb51f325c014d93759c05e8dad9c68584..6349467c5d6b708246c2df6f8c120c29fac77964 100644 (file)
@@ -1,5 +1,6 @@
 import unittest
 import sys
+import gc
 from collections import namedtuple
 from test.support import import_helper
 
@@ -257,5 +258,29 @@ class CAPITest(unittest.TestCase):
         self.assertRaises(SystemError, resize, [1, 2, 3], 0, False)
         self.assertRaises(SystemError, resize, NULL, 0, False)
 
+    def test_bug_59313(self):
+        # Before 3.14, the C-API function PySequence_Tuple
+        # would create incomplete tuples which were visible to
+        # the cycle GC, and this test would crash the interpeter.
+        TAG = object()
+        tuples = []
+
+        def referrer_tuples():
+            return [x for x in gc.get_referrers(TAG)
+                if isinstance(x, tuple)]
+
+        def my_iter():
+            nonlocal tuples
+            yield TAG    # 'tag' gets stored in the result tuple
+            tuples += referrer_tuples()
+            for x in range(10):
+                tuples += referrer_tuples()
+                # Prior to 3.13 would raise a SystemError when the tuple needs to be resized
+                yield x
+
+        self.assertEqual(tuple(my_iter()), (TAG, *range(10)))
+        self.assertEqual(tuples, [])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst
new file mode 100644 (file)
index 0000000..248e1b4
--- /dev/null
@@ -0,0 +1,3 @@
+``PySequence_Tuple`` now creates the resulting tuple atomically, preventing
+partially created tuples being visible to the garbage collector or through
+``gc.get_referrers()``
index f6647874d732f6cc4cded3bbb1e4412e048266f9..c92ef10aa796487b3cec26d811e27d8fb8f8d257 100644 (file)
@@ -1993,9 +1993,6 @@ PyObject *
 PySequence_Tuple(PyObject *v)
 {
     PyObject *it;  /* iter(v) */
-    Py_ssize_t n;             /* guess for result tuple size */
-    PyObject *result = NULL;
-    Py_ssize_t j;
 
     if (v == NULL) {
         return null_error();
@@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v)
     if (it == NULL)
         return NULL;
 
-    /* Guess result size and allocate space. */
-    n = PyObject_LengthHint(v, 10);
-    if (n == -1)
-        goto Fail;
-    result = PyTuple_New(n);
-    if (result == NULL)
-        goto Fail;
-
-    /* Fill the tuple. */
-    for (j = 0; ; ++j) {
+    Py_ssize_t n;
+    PyObject *buffer[8];
+    for (n = 0; n < 8; n++) {
         PyObject *item = PyIter_Next(it);
         if (item == NULL) {
-            if (PyErr_Occurred())
-                goto Fail;
-            break;
-        }
-        if (j >= n) {
-            size_t newn = (size_t)n;
-            /* The over-allocation strategy can grow a bit faster
-               than for lists because unlike lists the
-               over-allocation isn't permanent -- we reclaim
-               the excess before the end of this routine.
-               So, grow by ten and then add 25%.
-            */
-            newn += 10u;
-            newn += newn >> 2;
-            if (newn > PY_SSIZE_T_MAX) {
-                /* Check for overflow */
-                PyErr_NoMemory();
-                Py_DECREF(item);
-                goto Fail;
+            if (PyErr_Occurred()) {
+                goto fail;
             }
-            n = (Py_ssize_t)newn;
-            if (_PyTuple_Resize(&result, n) != 0) {
-                Py_DECREF(item);
-                goto Fail;
+            Py_DECREF(it);
+            return _PyTuple_FromArraySteal(buffer, n);
+        }
+        buffer[n] = item;
+    }
+    PyListObject *list = (PyListObject *)PyList_New(16);
+    if (list == NULL) {
+        goto fail;
+    }
+    assert(n == 8);
+    Py_SET_SIZE(list, n);
+    for (Py_ssize_t j = 0; j < n; j++) {
+        PyList_SET_ITEM(list, j, buffer[j]);
+    }
+    for (;;) {
+        PyObject *item = PyIter_Next(it);
+        if (item == NULL) {
+            if (PyErr_Occurred()) {
+                Py_DECREF(list);
+                Py_DECREF(it);
+                return NULL;
             }
+            break;
+        }
+        if (_PyList_AppendTakeRef(list, item) < 0) {
+            Py_DECREF(list);
+            Py_DECREF(it);
+            return NULL;
         }
-        PyTuple_SET_ITEM(result, j, item);
     }
-
-    /* Cut tuple back if guess was too large. */
-    if (j < n &&
-        _PyTuple_Resize(&result, j) != 0)
-        goto Fail;
-
     Py_DECREF(it);
-    return result;
-
-Fail:
-    Py_XDECREF(result);
+    PyObject *res = _PyList_AsTupleAndClear(list);
+    Py_DECREF(list);
+    return res;
+fail:
     Py_DECREF(it);
+    while (n > 0) {
+        n--;
+        Py_DECREF(buffer[n]);
+    }
     return NULL;
 }
 
index 3832295600a0ab006cc026fdf93ed6a5f1ecb786..a877bad66be45f3d6f653b5a9fefef64335691f3 100644 (file)
@@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v)
     return ret;
 }
 
+PyObject *
+_PyList_AsTupleAndClear(PyListObject *self)
+{
+    assert(self != NULL);
+    PyObject *ret;
+    if (self->ob_item == NULL) {
+        return PyTuple_New(0);
+    }
+    Py_BEGIN_CRITICAL_SECTION(self);
+    PyObject **items = self->ob_item;
+    Py_ssize_t size = Py_SIZE(self);
+    self->ob_item = NULL;
+    Py_SET_SIZE(self, 0);
+    ret = _PyTuple_FromArraySteal(items, size);
+    free_list_items(items, false);
+    Py_END_CRITICAL_SECTION();
+    return ret;
+}
+
 PyObject *
 _PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n)
 {