]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-124470: Fix crash when reading from object instance dictionary while replacing...
authorDino Viehland <dinoviehland@meta.com>
Thu, 21 Nov 2024 16:41:19 +0000 (08:41 -0800)
committerGitHub <noreply@github.com>
Thu, 21 Nov 2024 16:41:19 +0000 (10:41 -0600)
Delay free a dictionary when replacing it

Include/internal/pycore_gc.h
Include/internal/pycore_pymem.h
Lib/test/test_free_threading/test_dict.py
Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst [new file with mode: 0644]
Objects/dictobject.c
Objects/obmalloc.c
Python/gc_free_threading.c

index 38a1c56c09d9db209b07ca055f2df653428b30ad..479fe10d00066d7f560a66470ed37d81a49df53a 100644 (file)
@@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
 #  define _PyGC_BITS_UNREACHABLE    (4)
 #  define _PyGC_BITS_FROZEN         (8)
 #  define _PyGC_BITS_SHARED         (16)
-#  define _PyGC_BITS_SHARED_INLINE  (32)
 #  define _PyGC_BITS_DEFERRED       (64)    // Use deferred reference counting
 #endif
 
@@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
 }
 #define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))
 
-/* True if the memory of the object is shared between multiple
- * threads and needs special purpose when freeing due to
- * the possibility of in-flight lock-free reads occurring.
- * Objects with this bit that are GC objects will automatically
- * delay-freed by PyObject_GC_Del. */
-static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
-    return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_IS_SHARED_INLINE(op) \
-    _PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
-static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
-    _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_SET_SHARED_INLINE(op) \
-    _PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
 #endif
 
 /* Bit flags for _gc_prev */
index dd6b0762370c92602a7e128f44b3d459cc6f70d3..5bb34001aab1b460e5f3a0aae88b9e43fe362160 100644 (file)
@@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
 extern void _PyMem_FreeDelayed(void *ptr);
 
 // Enqueue an object to be freed possibly after some delay
-extern void _PyObject_FreeDelayed(void *ptr);
+#ifdef Py_GIL_DISABLED
+extern void _PyObject_XDecRefDelayed(PyObject *obj);
+#else
+static inline void _PyObject_XDecRefDelayed(PyObject *obj)
+{
+    Py_XDECREF(obj);
+}
+#endif
 
 // Periodically process delayed free requests.
 extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
 
+
+// Periodically process delayed free requests when the world is stopped.
+// Notify of any objects whic should be freeed.
+typedef void (*delayed_dealloc_cb)(PyObject *, void *);
+extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
+                                           delayed_dealloc_cb cb, void *state);
+
 // Abandon all thread-local delayed free requests and push them to the
 // interpreter's queue.
 extern void _PyMem_AbandonDelayed(PyThreadState *tstate);
index 80daf0d9cae9e0f1ecd45d3831a329dd95ef86a5..13717cb39fa35d277ce2a053ec71a6b166b8ecff 100644 (file)
@@ -142,6 +142,70 @@ class TestDict(TestCase):
             for ref in thread_list:
                 self.assertIsNone(ref())
 
+    def test_racing_set_object_dict(self):
+        """Races assigning to __dict__ should be thread safe"""
+        class C: pass
+        class MyDict(dict): pass
+        for cyclic in (False, True):
+            f = C()
+            f.__dict__ = {"foo": 42}
+            THREAD_COUNT = 10
+
+            def writer_func(l):
+                for i in range(1000):
+                    if cyclic:
+                        other_d = {}
+                    d = MyDict({"foo": 100})
+                    if cyclic:
+                        d["x"] = other_d
+                        other_d["bar"] = d
+                    l.append(weakref.ref(d))
+                    f.__dict__ = d
+
+            def reader_func():
+                for i in range(1000):
+                    f.foo
+
+            lists = []
+            readers = []
+            writers = []
+            for x in range(THREAD_COUNT):
+                thread_list = []
+                lists.append(thread_list)
+                writer = Thread(target=partial(writer_func, thread_list))
+                writers.append(writer)
+
+            for x in range(THREAD_COUNT):
+                reader = Thread(target=partial(reader_func))
+                readers.append(reader)
+
+            for writer in writers:
+                writer.start()
+            for reader in readers:
+                reader.start()
+
+            for writer in writers:
+                writer.join()
+
+            for reader in readers:
+                reader.join()
+
+            f.__dict__ = {}
+            gc.collect()
+            gc.collect()
+
+            count = 0
+            ids = set()
+            for thread_list in lists:
+                for i, ref in enumerate(thread_list):
+                    if ref() is None:
+                        continue
+                    count += 1
+                    ids.add(id(ref()))
+                    count += 1
+
+            self.assertEqual(count, 0)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst
new file mode 100644 (file)
index 0000000..8f2f371
--- /dev/null
@@ -0,0 +1 @@
+Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread
index 23616b3918b0d6ca51c94954925e89f8a66a2d95..393e9f913908095714125b6efbeea31200b0e8d2 100644 (file)
@@ -7087,51 +7087,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
     }
 }
 
-int
-_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+#ifdef Py_GIL_DISABLED
+
+// Trys and sets the dictionary for an object in the easy case when our current
+// dictionary is either completely not materialized or is a dictionary which
+// does not point at the inline values.
+static bool
+try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
+{
+    bool replaced = false;
+    Py_BEGIN_CRITICAL_SECTION(obj);
+
+    PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
+    if (dict == NULL) {
+        // We only have inline values, we can just completely replace them.
+        set_dict_inline_values(obj, (PyDictObject *)new_dict);
+        replaced = true;
+        goto exit_lock;
+    }
+
+    if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
+        // We have a materialized dict which doesn't point at the inline values,
+        // We get to simply swap dictionaries and free the old dictionary.
+        FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+                            (PyDictObject *)Py_XNewRef(new_dict));
+        replaced = true;
+        goto exit_lock;
+    }
+    else {
+        // We have inline values, we need to lock the dict and the object
+        // at the same time to safely dematerialize them. To do that while releasing
+        // the object lock we need a strong reference to the current dictionary.
+        Py_INCREF(dict);
+    }
+exit_lock:
+    Py_END_CRITICAL_SECTION();
+    return replaced;
+}
+
+// Replaces a dictionary that is probably the dictionary which has been
+// materialized and points at the inline values. We could have raced
+// and replaced it with another dictionary though.
+static int
+replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
+                                          PyDictObject *cur_dict, PyObject *new_dict)
+{
+    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);
+
+    if (cur_dict == inline_dict) {
+        assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));
+
+        int err = _PyDict_DetachFromObject(inline_dict, obj);
+        if (err != 0) {
+            assert(new_dict == NULL);
+            return err;
+        }
+    }
+
+    FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+                        (PyDictObject *)Py_XNewRef(new_dict));
+    return 0;
+}
+
+#endif
+
+static void
+decref_maybe_delay(PyObject *obj, bool delay)
+{
+    if (delay) {
+        _PyObject_XDecRefDelayed(obj);
+    }
+    else {
+        Py_XDECREF(obj);
+    }
+}
+
+static int
+set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
 {
     assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+#ifndef NDEBUG
+    Py_BEGIN_CRITICAL_SECTION(obj);
     assert(_PyObject_InlineValuesConsistencyCheck(obj));
+    Py_END_CRITICAL_SECTION();
+#endif
     int err = 0;
     PyTypeObject *tp = Py_TYPE(obj);
     if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
-        PyDictObject *dict = _PyObject_GetManagedDict(obj);
-        if (dict == NULL) {
 #ifdef Py_GIL_DISABLED
-            Py_BEGIN_CRITICAL_SECTION(obj);
+        PyDictObject *prev_dict;
+        if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
+            // We had a materialized dictionary which pointed at the inline
+            // values. We need to lock both the object and the dict at the
+            // same time to safely replace it. We can't merely lock the dictionary
+            // while the object is locked because it could suspend the object lock.
+            PyDictObject *cur_dict;
 
-            dict = _PyObject_ManagedDictPointer(obj)->dict;
-            if (dict == NULL) {
-                set_dict_inline_values(obj, (PyDictObject *)new_dict);
-            }
+            assert(prev_dict != NULL);
+            Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);
 
-            Py_END_CRITICAL_SECTION();
+            // We could have had another thread race in between the call to
+            // try_set_dict_inline_only_or_other_dict where we locked the object
+            // and when we unlocked and re-locked the dictionary.
+            cur_dict = _PyObject_GetManagedDict(obj);
 
-            if (dict == NULL) {
-                return 0;
+            err = replace_dict_probably_inline_materialized(obj, prev_dict,
+                                                            cur_dict, new_dict);
+
+            Py_END_CRITICAL_SECTION2();
+
+            // Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
+            // while the object was locked
+            decref_maybe_delay((PyObject *)prev_dict,
+                               !clear && prev_dict != cur_dict);
+            if (err != 0) {
+                return err;
             }
-#else
-            set_dict_inline_values(obj, (PyDictObject *)new_dict);
-            return 0;
-#endif
-        }
 
-        Py_BEGIN_CRITICAL_SECTION2(dict, obj);
+            prev_dict = cur_dict;
+        }
 
-        // We've locked dict, but the actual dict could have changed
-        // since we locked it.
-        dict = _PyObject_ManagedDictPointer(obj)->dict;
-        err = _PyDict_DetachFromObject(dict, obj);
-        assert(err == 0 || new_dict == NULL);
-        if (err == 0) {
-            FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
-                                (PyDictObject *)Py_XNewRef(new_dict));
+        if (prev_dict != NULL) {
+            // decref for the dictionary that we replaced
+            decref_maybe_delay((PyObject *)prev_dict, !clear);
         }
-        Py_END_CRITICAL_SECTION2();
 
-        if (err == 0) {
-            Py_XDECREF(dict);
+        return 0;
+#else
+        PyDictObject *dict = _PyObject_GetManagedDict(obj);
+        if (dict == NULL) {
+            set_dict_inline_values(obj, (PyDictObject *)new_dict);
+            return 0;
+        }
+        if (_PyDict_DetachFromObject(dict, obj) == 0) {
+            _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
+            Py_DECREF(dict);
+            return 0;
         }
+        assert(new_dict == NULL);
+        return -1;
+#endif
     }
     else {
         PyDictObject *dict;
@@ -7144,17 +7239,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
                             (PyDictObject *)Py_XNewRef(new_dict));
 
         Py_END_CRITICAL_SECTION();
-
-        Py_XDECREF(dict);
+        decref_maybe_delay((PyObject *)dict, !clear);
     }
     assert(_PyObject_InlineValuesConsistencyCheck(obj));
     return err;
 }
 
+int
+_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+{
+    return set_or_clear_managed_dict(obj, new_dict, false);
+}
+
 void
 PyObject_ClearManagedDict(PyObject *obj)
 {
-    if (_PyObject_SetManagedDict(obj, NULL) < 0) {
+    if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
         /* Must be out of memory */
         assert(PyErr_Occurred() == PyExc_MemoryError);
         PyErr_WriteUnraisable(NULL);
index dfeccfa4dd76586a0be4f14fd1d25695580dc515..3d6b1ab1840e73cab37178b898f6d585995aa488 100644 (file)
@@ -1093,10 +1093,24 @@ struct _mem_work_chunk {
 };
 
 static void
-free_work_item(uintptr_t ptr)
+free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
 {
     if (ptr & 0x01) {
-        PyObject_Free((char *)(ptr - 1));
+        PyObject *obj = (PyObject *)(ptr - 1);
+#ifdef Py_GIL_DISABLED
+        if (cb == NULL) {
+            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
+            Py_DECREF(obj);
+            return;
+        }
+
+        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
+        if (refcount == 0) {
+            cb(obj, state);
+        }
+#else
+        Py_DECREF(obj);
+#endif
     }
     else {
         PyMem_Free((void *)ptr);
@@ -1107,7 +1121,7 @@ static void
 free_delayed(uintptr_t ptr)
 {
 #ifndef Py_GIL_DISABLED
-    free_work_item(ptr);
+    free_work_item(ptr, NULL, NULL);
 #else
     PyInterpreterState *interp = _PyInterpreterState_GET();
     if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
@@ -1115,7 +1129,8 @@ free_delayed(uintptr_t ptr)
     {
         // Free immediately during interpreter shutdown or if the world is
         // stopped.
-        free_work_item(ptr);
+        assert(!interp->stoptheworld.world_stopped || !(ptr & 0x01));
+        free_work_item(ptr, NULL, NULL);
         return;
     }
 
@@ -1142,7 +1157,8 @@ free_delayed(uintptr_t ptr)
     if (buf == NULL) {
         // failed to allocate a buffer, free immediately
         _PyEval_StopTheWorld(tstate->base.interp);
-        free_work_item(ptr);
+        // TODO: Fix me
+        free_work_item(ptr, NULL, NULL);
         _PyEval_StartTheWorld(tstate->base.interp);
         return;
     }
@@ -1166,12 +1182,16 @@ _PyMem_FreeDelayed(void *ptr)
     free_delayed((uintptr_t)ptr);
 }
 
+#ifdef Py_GIL_DISABLED
 void
-_PyObject_FreeDelayed(void *ptr)
+_PyObject_XDecRefDelayed(PyObject *ptr)
 {
     assert(!((uintptr_t)ptr & 0x01));
-    free_delayed(((uintptr_t)ptr)|0x01);
+    if (ptr != NULL) {
+        free_delayed(((uintptr_t)ptr)|0x01);
+    }
 }
+#endif
 
 static struct _mem_work_chunk *
 work_queue_first(struct llist_node *head)
@@ -1181,7 +1201,7 @@ work_queue_first(struct llist_node *head)
 
 static void
 process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
-              bool keep_empty)
+              bool keep_empty, delayed_dealloc_cb cb, void *state)
 {
     while (!llist_empty(head)) {
         struct _mem_work_chunk *buf = work_queue_first(head);
@@ -1192,7 +1212,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
                 return;
             }
 
-            free_work_item(item->ptr);
+            free_work_item(item->ptr, cb, state);
             buf->rd_idx++;
         }
 
@@ -1210,7 +1230,8 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
 
 static void
 process_interp_queue(struct _Py_mem_interp_free_queue *queue,
-                     struct _qsbr_thread_state *qsbr)
+                     struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb,
+                     void *state)
 {
     if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
         return;
@@ -1218,7 +1239,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue,
 
     // Try to acquire the lock, but don't block if it's already held.
     if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
-        process_queue(&queue->head, qsbr, false);
+        process_queue(&queue->head, qsbr, false, cb, state);
 
         int more_work = !llist_empty(&queue->head);
         _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
@@ -1234,10 +1255,23 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
     _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
 
     // Process thread-local work
-    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
+    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL);
+
+    // Process shared interpreter work
+    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL);
+}
+
+void
+_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
+{
+    PyInterpreterState *interp = tstate->interp;
+    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+
+    // Process thread-local work
+    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state);
 
     // Process shared interpreter work
-    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
+    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state);
 }
 
 void
@@ -1279,7 +1313,7 @@ _PyMem_FiniDelayed(PyInterpreterState *interp)
             // Free the remaining items immediately. There should be no other
             // threads accessing the memory at this point during shutdown.
             struct _mem_work_item *item = &buf->array[buf->rd_idx];
-            free_work_item(item->ptr);
+            free_work_item(item->ptr, NULL, NULL);
             buf->rd_idx++;
         }
 
index a6e0022340b7fe817922b0e0453a19eefb07be39..0920c616c3c6b0d3ae4b0e65980752826e250791 100644 (file)
@@ -393,6 +393,23 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
     HEAD_UNLOCK(&_PyRuntime);
 }
 
+static void
+queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
+{
+    if (!_PyObject_GC_IS_TRACKED(op)) {
+        // GC objects with zero refcount are handled subsequently by the
+        // GC as if they were cyclic trash, but we have to handle dead
+        // non-GC objects here. Add one to the refcount so that we can
+        // decref and deallocate the object once we start the world again.
+        op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
+#ifdef Py_REF_DEBUG
+        _Py_IncRefTotal(_PyThreadState_GET());
+#endif
+        worklist_push(&state->objs_to_decref, op);
+    }
+
+}
+
 static void
 merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
 {
@@ -404,22 +421,20 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
         // Subtract one when merging because the queue had a reference.
         Py_ssize_t refcount = merge_refcount(op, -1);
 
-        if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) {
-            // GC objects with zero refcount are handled subsequently by the
-            // GC as if they were cyclic trash, but we have to handle dead
-            // non-GC objects here. Add one to the refcount so that we can
-            // decref and deallocate the object once we start the world again.
-            op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
-#ifdef Py_REF_DEBUG
-            _Py_IncRefTotal(_PyThreadState_GET());
-#endif
-            worklist_push(&state->objs_to_decref, op);
+        if (refcount == 0) {
+            queue_untracked_obj_decref(op, state);
         }
     }
 }
 
 static void
-process_delayed_frees(PyInterpreterState *interp)
+queue_freed_object(PyObject *obj, void *arg)
+{
+    queue_untracked_obj_decref(obj, arg);
+}
+
+static void
+process_delayed_frees(PyInterpreterState *interp, struct collection_state *state)
 {
     // While we are in a "stop the world" pause, we can observe the latest
     // write sequence by advancing the write sequence immediately.
@@ -438,7 +453,7 @@ process_delayed_frees(PyInterpreterState *interp)
     }
     HEAD_UNLOCK(&_PyRuntime);
 
-    _PyMem_ProcessDelayed((PyThreadState *)current_tstate);
+    _PyMem_ProcessDelayedNoDealloc((PyThreadState *)current_tstate, queue_freed_object, state);
 }
 
 // Subtract an incoming reference from the computed "gc_refs" refcount.
@@ -1231,7 +1246,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     }
     HEAD_UNLOCK(&_PyRuntime);
 
-    process_delayed_frees(interp);
+    process_delayed_frees(interp, state);
 
     // Find unreachable objects
     int err = deduce_unreachable_heap(interp, state);
@@ -1910,13 +1925,7 @@ PyObject_GC_Del(void *op)
     }
 
     record_deallocation(_PyThreadState_GET());
-    PyObject *self = (PyObject *)op;
-    if (_PyObject_GC_IS_SHARED_INLINE(self)) {
-        _PyObject_FreeDelayed(((char *)op)-presize);
-    }
-    else {
-        PyObject_Free(((char *)op)-presize);
-    }
+    PyObject_Free(((char *)op)-presize);
 }
 
 int