]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-112529: Implement GC for free-threaded builds (#114262)
authorSam Gross <colesbury@gmail.com>
Thu, 25 Jan 2024 18:27:36 +0000 (13:27 -0500)
committerGitHub <noreply@github.com>
Thu, 25 Jan 2024 18:27:36 +0000 (10:27 -0800)
* gh-112529: Implement GC for free-threaded builds

This implements a mark and sweep GC for the free-threaded builds of
CPython. The implementation relies on mimalloc to find GC tracked
objects (i.e., "containers").

18 files changed:
Include/internal/pycore_freelist.h
Include/internal/pycore_gc.h
Include/internal/pycore_object.h
Include/internal/pycore_object_stack.h [new file with mode: 0644]
Include/object.h
Lib/gzip.py
Lib/test/test_gc.py
Lib/test/test_io.py
Makefile.pre.in
Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst [new file with mode: 0644]
PCbuild/_freeze_module.vcxproj
PCbuild/_freeze_module.vcxproj.filters
PCbuild/pythoncore.vcxproj
PCbuild/pythoncore.vcxproj.filters
Python/gc.c
Python/gc_free_threading.c
Python/object_stack.c [new file with mode: 0644]
Python/pystate.c

index 4ab93ee2bf6c329e9a815c569adf7e8dc05c327c..dfb12839affedfcdd48aa4f82207c33d3904cc90 100644 (file)
@@ -20,6 +20,7 @@ extern "C" {
 #  define PyFloat_MAXFREELIST 100
 #  define PyContext_MAXFREELIST 255
 # define _PyAsyncGen_MAXFREELIST 80
+#  define _PyObjectStackChunk_MAXFREELIST 4
 #else
 #  define PyTuple_NFREELISTS 0
 #  define PyTuple_MAXFREELIST 0
@@ -27,6 +28,7 @@ extern "C" {
 #  define PyFloat_MAXFREELIST 0
 #  define PyContext_MAXFREELIST 0
 #  define _PyAsyncGen_MAXFREELIST 0
+#  define _PyObjectStackChunk_MAXFREELIST 0
 #endif
 
 struct _Py_list_state {
@@ -93,6 +95,13 @@ struct _Py_async_gen_state {
 #endif
 };
 
+struct _PyObjectStackChunk;
+
+struct _Py_object_stack_state {
+    struct _PyObjectStackChunk *free_list;
+    Py_ssize_t numfree;
+};
+
 typedef struct _Py_freelist_state {
     struct _Py_float_state float_state;
     struct _Py_tuple_state tuple_state;
@@ -100,6 +109,7 @@ typedef struct _Py_freelist_state {
     struct _Py_slice_state slice_state;
     struct _Py_context_state context_state;
     struct _Py_async_gen_state async_gen_state;
+    struct _Py_object_stack_state object_stack_state;
 } _PyFreeListState;
 
 #ifdef __cplusplus
index d53de97709a782f4ab5d9396f322f94f2b3b0d29..b362a294a59042ae9345e71b543c59be6831b36a 100644 (file)
@@ -37,10 +37,22 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
 }
 
 
+/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */
+#ifdef Py_GIL_DISABLED
+#  define _PyGC_BITS_TRACKED        (1)
+#  define _PyGC_BITS_FINALIZED      (2)
+#  define _PyGC_BITS_UNREACHABLE    (4)
+#  define _PyGC_BITS_FROZEN         (8)
+#endif
+
 /* True if the object is currently tracked by the GC. */
 static inline int _PyObject_GC_IS_TRACKED(PyObject *op) {
+#ifdef Py_GIL_DISABLED
+    return (op->ob_gc_bits & _PyGC_BITS_TRACKED) != 0;
+#else
     PyGC_Head *gc = _Py_AS_GC(op);
     return (gc->_gc_next != 0);
+#endif
 }
 #define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op))
 
@@ -107,24 +119,29 @@ static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
     gc->_gc_prev = ((gc->_gc_prev & ~_PyGC_PREV_MASK) | uprev);
 }
 
-static inline int _PyGCHead_FINALIZED(PyGC_Head *gc) {
-    return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0);
-}
-static inline void _PyGCHead_SET_FINALIZED(PyGC_Head *gc) {
-    gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED;
-}
-
 static inline int _PyGC_FINALIZED(PyObject *op) {
+#ifdef Py_GIL_DISABLED
+    return (op->ob_gc_bits & _PyGC_BITS_FINALIZED) != 0;
+#else
     PyGC_Head *gc = _Py_AS_GC(op);
-    return _PyGCHead_FINALIZED(gc);
+    return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0);
+#endif
 }
 static inline void _PyGC_SET_FINALIZED(PyObject *op) {
+#ifdef Py_GIL_DISABLED
+    op->ob_gc_bits |= _PyGC_BITS_FINALIZED;
+#else
     PyGC_Head *gc = _Py_AS_GC(op);
-    _PyGCHead_SET_FINALIZED(gc);
+    gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED;
+#endif
 }
 static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
+#ifdef Py_GIL_DISABLED
+    op->ob_gc_bits &= ~_PyGC_BITS_FINALIZED;
+#else
     PyGC_Head *gc = _Py_AS_GC(op);
     gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED;
+#endif
 }
 
 
index ec14c07a2991ff8c7011deedbe6c370a1c5069dd..4e52ffc77c5956b9c8aae809c870e075faf5cf8d 100644 (file)
@@ -322,6 +322,9 @@ static inline void _PyObject_GC_TRACK(
                           "object is in generation which is garbage collected",
                           filename, lineno, __func__);
 
+#ifdef Py_GIL_DISABLED
+    op->ob_gc_bits |= _PyGC_BITS_TRACKED;
+#else
     PyInterpreterState *interp = _PyInterpreterState_GET();
     PyGC_Head *generation0 = interp->gc.generation0;
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
@@ -329,6 +332,7 @@ static inline void _PyObject_GC_TRACK(
     _PyGCHead_SET_PREV(gc, last);
     _PyGCHead_SET_NEXT(gc, generation0);
     generation0->_gc_prev = (uintptr_t)gc;
+#endif
 }
 
 /* Tell the GC to stop tracking this object.
@@ -352,6 +356,9 @@ static inline void _PyObject_GC_UNTRACK(
                           "object not tracked by the garbage collector",
                           filename, lineno, __func__);
 
+#ifdef Py_GIL_DISABLED
+    op->ob_gc_bits &= ~_PyGC_BITS_TRACKED;
+#else
     PyGC_Head *gc = _Py_AS_GC(op);
     PyGC_Head *prev = _PyGCHead_PREV(gc);
     PyGC_Head *next = _PyGCHead_NEXT(gc);
@@ -359,6 +366,7 @@ static inline void _PyObject_GC_UNTRACK(
     _PyGCHead_SET_PREV(next, prev);
     gc->_gc_next = 0;
     gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+#endif
 }
 
 // Macros to accept any type for the parameter, and to automatically pass
diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h
new file mode 100644 (file)
index 0000000..1dc1c15
--- /dev/null
@@ -0,0 +1,84 @@
+#ifndef Py_INTERNAL_OBJECT_STACK_H
+#define Py_INTERNAL_OBJECT_STACK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+// _PyObjectStack is a stack of Python objects implemented as a linked list of
+// fixed size buffers.
+
+// Chosen so that _PyObjectStackChunk is a power-of-two size.
+#define _Py_OBJECT_STACK_CHUNK_SIZE 254
+
+typedef struct _PyObjectStackChunk {
+    struct _PyObjectStackChunk *prev;
+    Py_ssize_t n;
+    PyObject *objs[_Py_OBJECT_STACK_CHUNK_SIZE];
+} _PyObjectStackChunk;
+
+typedef struct _PyObjectStack {
+    _PyObjectStackChunk *head;
+} _PyObjectStack;
+
+
+extern _PyObjectStackChunk *
+_PyObjectStackChunk_New(void);
+
+extern void
+_PyObjectStackChunk_Free(_PyObjectStackChunk *);
+
+extern void
+_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization);
+
+// Push an item onto the stack. Return -1 on allocation failure, 0 on success.
+static inline int
+_PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj)
+{
+    _PyObjectStackChunk *buf = stack->head;
+    if (buf == NULL || buf->n == _Py_OBJECT_STACK_CHUNK_SIZE) {
+        buf = _PyObjectStackChunk_New();
+        if (buf == NULL) {
+            return -1;
+        }
+        buf->prev = stack->head;
+        buf->n = 0;
+        stack->head = buf;
+    }
+
+    assert(buf->n >= 0 && buf->n < _Py_OBJECT_STACK_CHUNK_SIZE);
+    buf->objs[buf->n] = obj;
+    buf->n++;
+    return 0;
+}
+
+// Pop the top item from the stack.  Return NULL if the stack is empty.
+static inline PyObject *
+_PyObjectStack_Pop(_PyObjectStack *stack)
+{
+    _PyObjectStackChunk *buf = stack->head;
+    if (buf == NULL) {
+        return NULL;
+    }
+    assert(buf->n > 0 && buf->n <= _Py_OBJECT_STACK_CHUNK_SIZE);
+    buf->n--;
+    PyObject *obj = buf->objs[buf->n];
+    if (buf->n == 0) {
+        stack->head = buf->prev;
+        _PyObjectStackChunk_Free(buf);
+    }
+    return obj;
+}
+
+// Remove all items from the stack
+extern void
+_PyObjectStack_Clear(_PyObjectStack *stack);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // !Py_INTERNAL_OBJECT_STACK_H
index 48f1ddf7510887ddc390fe3b1223177b2d234259..ef3fb721c2b0122487a5bb859d94f2098cd26e22 100644 (file)
@@ -212,7 +212,9 @@ struct _object {
 struct _PyMutex { uint8_t v; };
 
 struct _object {
-    uintptr_t ob_tid;           // thread id (or zero)
+    // ob_tid stores the thread id (or zero). It is also used by the GC to
+    // store linked lists and the computed "gc_refs" refcount.
+    uintptr_t ob_tid;
     uint16_t _padding;
     struct _PyMutex ob_mutex;   // per-object lock
     uint8_t ob_gc_bits;         // gc-related state
index 177f9080dc5af8b3fce7cff5a7089234cc674b3d..fda93e0261e02858a740a133086a2c4cee0f41bc 100644 (file)
@@ -349,7 +349,7 @@ class GzipFile(_compression.BaseStream):
 
     def close(self):
         fileobj = self.fileobj
-        if fileobj is None:
+        if fileobj is None or self._buffer.closed:
             return
         try:
             if self.mode == WRITE:
index 1d71dd9e262a6ae14b77f55fdd444556e9e992c7..b01f344cb14a1a77cace0c99692aa74c193ac7ec 100644 (file)
@@ -1,7 +1,7 @@
 import unittest
 import unittest.mock
 from test.support import (verbose, refcount_test,
-                          cpython_only, requires_subprocess)
+                          cpython_only, requires_subprocess, Py_GIL_DISABLED)
 from test.support.import_helper import import_module
 from test.support.os_helper import temp_dir, TESTFN, unlink
 from test.support.script_helper import assert_python_ok, make_script
@@ -815,6 +815,15 @@ class GCTests(unittest.TestCase):
         self.assertEqual(gc.get_freeze_count(), 0)
 
     def test_get_objects(self):
+        gc.collect()
+        l = []
+        l.append(l)
+        self.assertTrue(
+                any(l is element for element in gc.get_objects())
+        )
+
+    @unittest.skipIf(Py_GIL_DISABLED, 'need generational GC')
+    def test_get_objects_generations(self):
         gc.collect()
         l = []
         l.append(l)
@@ -1225,7 +1234,7 @@ class GCCallbackTests(unittest.TestCase):
         p.stderr.close()
         # Verify that stderr has a useful error message:
         self.assertRegex(stderr,
-            br'gc\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.')
+            br'gc.*\.c:[0-9]+: .*: Assertion "gc_get_refs\(.+\) .*" failed.')
         self.assertRegex(stderr,
             br'refcount is too small')
         # "address : 0x7fb5062efc18"
index 936edea3cad70c6df7e7bd0dce3b826fc9194b47..9e28b936e00bd5748ab4bcad8f8c1a7f36a28391 100644 (file)
@@ -3652,10 +3652,8 @@ class TextIOWrapperTest(unittest.TestCase):
             codecs.lookup('utf-8')
 
             class C:
-                def __init__(self):
-                    self.buf = io.BytesIO()
                 def __del__(self):
-                    io.TextIOWrapper(self.buf, **{kwargs})
+                    io.TextIOWrapper(io.BytesIO(), **{kwargs})
                     print("ok")
             c = C()
             """.format(iomod=iomod, kwargs=kwargs)
index 21b122ae0fcd9fd0a2f3e7100223636c08842088..37a8b06987c710775f9b8696fc4b386bf0ccd90e 100644 (file)
@@ -439,6 +439,7 @@ PYTHON_OBJS=        \
                Python/modsupport.o \
                Python/mysnprintf.o \
                Python/mystrtoul.o \
+               Python/object_stack.o \
                Python/optimizer.o \
                Python/optimizer_analysis.o \
                Python/parking_lot.o \
@@ -1832,6 +1833,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/internal/pycore_frame.h \
                $(srcdir)/Include/internal/pycore_freelist.h \
                $(srcdir)/Include/internal/pycore_function.h \
+               $(srcdir)/Include/internal/pycore_gc.h \
                $(srcdir)/Include/internal/pycore_genobject.h \
                $(srcdir)/Include/internal/pycore_getopt.h \
                $(srcdir)/Include/internal/pycore_gil.h \
@@ -1853,6 +1855,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/internal/pycore_namespace.h \
                $(srcdir)/Include/internal/pycore_object.h \
                $(srcdir)/Include/internal/pycore_object_alloc.h \
+               $(srcdir)/Include/internal/pycore_object_stack.h \
                $(srcdir)/Include/internal/pycore_object_state.h \
                $(srcdir)/Include/internal/pycore_obmalloc.h \
                $(srcdir)/Include/internal/pycore_obmalloc_init.h \
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst
new file mode 100644 (file)
index 0000000..b3aa438
--- /dev/null
@@ -0,0 +1,3 @@
+The free-threaded build now has its own thread-safe GC implementation that
+uses mimalloc to find GC tracked objects. It is non-generational, unlike the
+existing GC implementation.
index 610581bc96cb1a2aad94e707916195fb287dede0..dde801fc0fd525e4714f475c5b390a3fbdb6bad7 100644 (file)
     <ClCompile Include="..\Python\modsupport.c" />
     <ClCompile Include="..\Python\mysnprintf.c" />
     <ClCompile Include="..\Python\mystrtoul.c" />
+    <ClCompile Include="..\Python\object_stack.c" />
     <ClCompile Include="..\Python\optimizer.c" />
     <ClCompile Include="..\Python\optimizer_analysis.c" />
     <ClCompile Include="..\Python\parking_lot.c" />
index 3141913c043869551cf9c79a8a33fd4aad9ee5bb..90ccb954b424bc132f0dbf2dd3c51f3b47fcf4d1 100644 (file)
     <ClCompile Include="..\Objects\object.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Python\object_stack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\Objects\obmalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
index 57275fb2039ee0b8c7e952e297c37a7726b94de3..e0b9fc137457a0805d1f2f14fcaf683a8963822e 100644 (file)
     <ClCompile Include="..\Python\modsupport.c" />
     <ClCompile Include="..\Python\mysnprintf.c" />
     <ClCompile Include="..\Python\mystrtoul.c" />
+    <ClCompile Include="..\Python\object_stack.c" />
     <ClCompile Include="..\Python\optimizer.c" />
     <ClCompile Include="..\Python\optimizer_analysis.c" />
     <ClCompile Include="..\Python\parking_lot.c" />
index 51cbb079b5b550b75797065964b9cc1743743b16..fd79436f5add97023649ae1456f76a3108af02a3 100644 (file)
     <ClCompile Include="..\Python\mystrtoul.c">
       <Filter>Python</Filter>
     </ClCompile>
+    <ClCompile Include="..\Python\object_stack.c">
+      <Filter>Python</Filter>
+    </ClCompile>
     <ClCompile Include="..\Python\optimizer.c">
       <Filter>Python</Filter>
     </ClCompile>
index 14870505ef1308e0fe1333fe9a29e150273a5c6d..466467602915264c845998ba4e9e321c6fd0ef43 100644 (file)
@@ -15,6 +15,8 @@
 #include "pycore_weakref.h"       // _PyWeakref_ClearRef()
 #include "pydtrace.h"
 
+#ifndef Py_GIL_DISABLED
+
 typedef struct _gc_runtime_state GCState;
 
 #ifdef Py_DEBUG
@@ -964,10 +966,10 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
         PyGC_Head *gc = GC_NEXT(collectable);
         PyObject *op = FROM_GC(gc);
         gc_list_move(gc, &seen);
-        if (!_PyGCHead_FINALIZED(gc) &&
+        if (!_PyGC_FINALIZED(op) &&
             (finalize = Py_TYPE(op)->tp_finalize) != NULL)
         {
-            _PyGCHead_SET_FINALIZED(gc);
+            _PyGC_SET_FINALIZED(op);
             Py_INCREF(op);
             finalize(op);
             assert(!_PyErr_Occurred(tstate));
@@ -1942,3 +1944,5 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
 done:
     gcstate->enabled = origenstate;
 }
+
+#endif  // Py_GIL_DISABLED
index 207a43b68d21f58b599416634699399e16deb1db..f2cd84981461a4f4410cc589131e35aba0acadfa 100644 (file)
+// Cyclic garbage collector implementation for free-threaded build.
 #include "Python.h"
-#include "pycore_pystate.h"   // _PyFreeListState_GET()
-#include "pycore_tstate.h"    // _PyThreadStateImpl
+#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
+#include "pycore_context.h"
+#include "pycore_dict.h"          // _PyDict_MaybeUntrack()
+#include "pycore_initconfig.h"
+#include "pycore_interp.h"        // PyInterpreterState.gc
+#include "pycore_object.h"
+#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
+#include "pycore_object_stack.h"
+#include "pycore_pyerrors.h"
+#include "pycore_pystate.h"       // _PyThreadState_GET()
+#include "pycore_tstate.h"        // _PyThreadStateImpl
+#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
+#include "pydtrace.h"
 
 #ifdef Py_GIL_DISABLED
 
+typedef struct _gc_runtime_state GCState;
+
+#ifdef Py_DEBUG
+#  define GC_DEBUG
+#endif
+
+// Automatically choose the generation that needs collecting.
+#define GENERATION_AUTO (-1)
+
+// A linked-list of objects using the `ob_tid` field as the next pointer.
+struct worklist {
+    uintptr_t head;
+};
+
+struct worklist_iter {
+    uintptr_t *ptr;   // pointer to current object
+    uintptr_t *next;  // next value of ptr
+};
+
+struct visitor_args {
+    size_t offset;  // offset of PyObject from start of block
+};
+
+// Per-collection state
+struct collection_state {
+    struct visitor_args base;
+    PyInterpreterState *interp;
+    GCState *gcstate;
+    Py_ssize_t collected;
+    Py_ssize_t uncollectable;
+    struct worklist unreachable;
+    struct worklist legacy_finalizers;
+    struct worklist wrcb_to_call;
+    struct worklist objs_to_decref;
+};
+
+// iterate over a worklist
+#define WORKSTACK_FOR_EACH(stack, op) \
+    for ((op) = (PyObject *)(stack)->head; (op) != NULL; (op) = (PyObject *)(op)->ob_tid)
+
+// iterate over a worklist with support for removing the current object
+#define WORKSTACK_FOR_EACH_ITER(stack, iter, op) \
+    for (worklist_iter_init((iter), &(stack)->head), (op) = (PyObject *)(*(iter)->ptr); \
+         (op) != NULL; \
+         worklist_iter_init((iter), (iter)->next), (op) = (PyObject *)(*(iter)->ptr))
+
+static void
+worklist_push(struct worklist *worklist, PyObject *op)
+{
+    assert(op->ob_tid == 0);
+    op->ob_tid = worklist->head;
+    worklist->head = (uintptr_t)op;
+}
+
+static PyObject *
+worklist_pop(struct worklist *worklist)
+{
+    PyObject *op = (PyObject *)worklist->head;
+    if (op != NULL) {
+        worklist->head = op->ob_tid;
+        op->ob_tid = 0;
+    }
+    return op;
+}
+
+static void
+worklist_iter_init(struct worklist_iter *iter, uintptr_t *next)
+{
+    iter->ptr = next;
+    PyObject *op = (PyObject *)*(iter->ptr);
+    if (op) {
+        iter->next = &op->ob_tid;
+    }
+}
+
+static void
+worklist_remove(struct worklist_iter *iter)
+{
+    PyObject *op = (PyObject *)*(iter->ptr);
+    *(iter->ptr) = op->ob_tid;
+    op->ob_tid = 0;
+    iter->next = iter->ptr;
+}
+
+static inline int
+gc_is_unreachable(PyObject *op)
+{
+    return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
+}
+
+static void
+gc_set_unreachable(PyObject *op)
+{
+    op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
+}
+
+static void
+gc_clear_unreachable(PyObject *op)
+{
+    op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
+}
+
+// Initialize the `ob_tid` field to zero if the object is not already
+// initialized as unreachable.
+static void
+gc_maybe_init_refs(PyObject *op)
+{
+    if (!gc_is_unreachable(op)) {
+        gc_set_unreachable(op);
+        op->ob_tid = 0;
+    }
+}
+
+static inline Py_ssize_t
+gc_get_refs(PyObject *op)
+{
+    return (Py_ssize_t)op->ob_tid;
+}
+
+static inline void
+gc_add_refs(PyObject *op, Py_ssize_t refs)
+{
+    assert(_PyObject_GC_IS_TRACKED(op));
+    op->ob_tid += refs;
+}
+
+static inline void
+gc_decref(PyObject *op)
+{
+    op->ob_tid -= 1;
+}
+
+// Merge refcounts while the world is stopped.
+static void
+merge_refcount(PyObject *op, Py_ssize_t extra)
+{
+    assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
+
+    Py_ssize_t refcount = Py_REFCNT(op);
+    refcount += extra;
+
+#ifdef Py_REF_DEBUG
+    _Py_AddRefTotal(_PyInterpreterState_GET(), extra);
+#endif
+
+    // No atomics necessary; all other threads in this interpreter are paused.
+    op->ob_tid = 0;
+    op->ob_ref_local = 0;
+    op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED);
+}
+
+static void
+gc_restore_tid(PyObject *op)
+{
+    mi_segment_t *segment = _mi_ptr_segment(op);
+    if (_Py_REF_IS_MERGED(op->ob_ref_shared)) {
+        op->ob_tid = 0;
+    }
+    else {
+        // NOTE: may change ob_tid if the object was re-initialized by
+        // a different thread or its segment was abandoned and reclaimed.
+        // The segment thread id might be zero, in which case we should
+        // ensure the refcounts are now merged.
+        op->ob_tid = segment->thread_id;
+        if (op->ob_tid == 0) {
+            merge_refcount(op, 0);
+        }
+    }
+}
+
+static void
+gc_restore_refs(PyObject *op)
+{
+    if (gc_is_unreachable(op)) {
+        gc_restore_tid(op);
+        gc_clear_unreachable(op);
+    }
+}
+
+// Given a mimalloc memory block return the PyObject stored in it or NULL if
+// the block is not allocated or the object is not tracked or is immortal.
+static PyObject *
+op_from_block(void *block, void *arg, bool include_frozen)
+{
+    struct visitor_args *a = arg;
+    if (block == NULL) {
+        return NULL;
+    }
+    PyObject *op = (PyObject *)((char*)block + a->offset);
+    assert(PyObject_IS_GC(op));
+    if (!_PyObject_GC_IS_TRACKED(op)) {
+        return NULL;
+    }
+    if (!include_frozen && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) {
+        return NULL;
+    }
+    return op;
+}
+
+static int
+gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
+                         struct visitor_args *arg)
+{
+    // Offset of PyObject header from start of memory block.
+    Py_ssize_t offset_base = sizeof(PyGC_Head);
+    if (_PyMem_DebugEnabled()) {
+        // The debug allocator adds two words at the beginning of each block.
+        offset_base += 2 * sizeof(size_t);
+    }
+
+    // Objects with Py_TPFLAGS_PREHEADER have two extra fields
+    Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*);
+
+    // visit each thread's heaps for GC objects
+    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+        struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc;
+
+        arg->offset = offset_base;
+        if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true,
+                                  visitor, arg)) {
+            return -1;
+        }
+        arg->offset = offset_pre;
+        if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC_PRE], true,
+                                  visitor, arg)) {
+            return -1;
+        }
+    }
+
+    // visit blocks in the per-interpreter abandoned pool (from dead threads)
+    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
+    arg->offset = offset_base;
+    if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC, true,
+                                         visitor, arg)) {
+        return -1;
+    }
+    arg->offset = offset_pre;
+    if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC_PRE, true,
+                                         visitor, arg)) {
+        return -1;
+    }
+    return 0;
+}
+
+// Visits all GC objects in the interpreter's heaps.
+// NOTE: It is not safe to allocate or free any mimalloc managed memory while
+// this function is running.
+static int
+gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
+               struct visitor_args *arg)
+{
+    // Other threads in the interpreter must be paused so that we can safely
+    // traverse their heaps.
+    assert(interp->stoptheworld.world_stopped);
+
+    int err;
+    HEAD_LOCK(&_PyRuntime);
+    err = gc_visit_heaps_lock_held(interp, visitor, arg);
+    HEAD_UNLOCK(&_PyRuntime);
+    return err;
+}
+
+// Subtract an incoming reference from the computed "gc_refs" refcount.
+static int
+visit_decref(PyObject *op, void *arg)
+{
+    if (_PyObject_GC_IS_TRACKED(op) && !_Py_IsImmortal(op)) {
+        // If update_refs hasn't reached this object yet, mark it
+        // as (tentatively) unreachable and initialize ob_tid to zero.
+        gc_maybe_init_refs(op);
+        gc_decref(op);
+    }
+    return 0;
+}
+
+// Compute the number of external references to objects in the heap
+// by subtracting internal references from the refcount. The difference is
+// computed in the ob_tid field (we restore it later).
+static bool
+update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
+            void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    // Exclude immortal objects from garbage collection
+    if (_Py_IsImmortal(op)) {
+        op->ob_tid = 0;
+        _PyObject_GC_UNTRACK(op);
+        gc_clear_unreachable(op);
+        return true;
+    }
+
+    // Untrack tuples and dicts as necessary in this pass.
+    if (PyTuple_CheckExact(op)) {
+        _PyTuple_MaybeUntrack(op);
+        if (!_PyObject_GC_IS_TRACKED(op)) {
+            gc_restore_refs(op);
+            return true;
+        }
+    }
+    else if (PyDict_CheckExact(op)) {
+        _PyDict_MaybeUntrack(op);
+        if (!_PyObject_GC_IS_TRACKED(op)) {
+            gc_restore_refs(op);
+            return true;
+        }
+    }
+
+    Py_ssize_t refcount = Py_REFCNT(op);
+    _PyObject_ASSERT(op, refcount >= 0);
+
+    // Add the actual refcount to ob_tid.
+    gc_maybe_init_refs(op);
+    gc_add_refs(op, refcount);
+
+    // Subtract internal references from ob_tid. Objects with ob_tid > 0
+    // are directly reachable from outside containers, and so can't be
+    // collected.
+    Py_TYPE(op)->tp_traverse(op, visit_decref, NULL);
+    return true;
+}
+
+static int
+visit_clear_unreachable(PyObject *op, _PyObjectStack *stack)
+{
+    if (gc_is_unreachable(op)) {
+        _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op));
+        gc_clear_unreachable(op);
+        return _PyObjectStack_Push(stack, op);
+    }
+    return 0;
+}
+
+// Transitively clear the unreachable bit on all objects reachable from op.
+static int
+mark_reachable(PyObject *op)
+{
+    _PyObjectStack stack = { NULL };
+    do {
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        if (traverse(op, (visitproc)&visit_clear_unreachable, &stack) < 0) {
+            _PyObjectStack_Clear(&stack);
+            return -1;
+        }
+        op = _PyObjectStack_Pop(&stack);
+    } while (op != NULL);
+    return 0;
+}
+
+#ifdef GC_DEBUG
+static bool
+validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
+                    void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    _PyObject_ASSERT(op, gc_is_unreachable(op));
+    _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
+                                  "refcount is too small");
+    return true;
+}
+#endif
+
+static bool
+mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
+                  void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    if (gc_is_unreachable(op) && gc_get_refs(op) != 0) {
+        // Object is reachable but currently marked as unreachable.
+        // Mark it as reachable and traverse its pointers to find
+        // any other object that may be directly reachable from it.
+        gc_clear_unreachable(op);
+
+        // Transitively mark reachable objects by clearing the unreachable flag.
+        if (mark_reachable(op) < 0) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/* Return true if object has a pre-PEP 442 finalization method. */
+static int
+has_legacy_finalizer(PyObject *op)
+{
+    return Py_TYPE(op)->tp_del != NULL;
+}
+
+static bool
+scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
+                  void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    struct collection_state *state = (struct collection_state *)args;
+    if (gc_is_unreachable(op)) {
+        // Merge and add one to the refcount to prevent deallocation while we
+        // are holding on to it in a worklist.
+        merge_refcount(op, 1);
+
+        if (has_legacy_finalizer(op)) {
+            // would be unreachable, but has legacy finalizer
+            gc_clear_unreachable(op);
+            worklist_push(&state->legacy_finalizers, op);
+        }
+        else {
+            worklist_push(&state->unreachable, op);
+        }
+    }
+    else {
+        // object is reachable, restore `ob_tid`; we're done with these objects
+        gc_restore_tid(op);
+        state->gcstate->long_lived_total++;
+    }
+
+    return true;
+}
+
+static int
+move_legacy_finalizer_reachable(struct collection_state *state);
+
+static int
+deduce_unreachable_heap(PyInterpreterState *interp,
+                        struct collection_state *state)
+{
+    // Identify objects that are directly reachable from outside the GC heap
+    // by computing the difference between the refcount and the number of
+    // incoming references.
+    gc_visit_heaps(interp, &update_refs, &state->base);
+
+#ifdef GC_DEBUG
+    // Check that all objects are marked as unreachable and that the computed
+    // reference count difference (stored in `ob_tid`) is non-negative.
+    gc_visit_heaps(interp, &validate_gc_objects, &state->base);
+#endif
+
+    // Transitively mark reachable objects by clearing the
+    // _PyGC_BITS_UNREACHABLE flag.
+    if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) {
+        return -1;
+    }
+
+    // Identify remaining unreachable objects and push them onto a stack.
+    // Restores ob_tid for reachable objects.
+    gc_visit_heaps(interp, &scan_heap_visitor, &state->base);
+
+    if (state->legacy_finalizers.head) {
+        // There may be objects reachable from legacy finalizers that are in
+        // the unreachable set. We need to mark them as reachable.
+        if (move_legacy_finalizer_reachable(state) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int
+move_legacy_finalizer_reachable(struct collection_state *state)
+{
+    // Clear the reachable bit on all objects transitively reachable
+    // from the objects with legacy finalizers.
+    PyObject *op;
+    WORKSTACK_FOR_EACH(&state->legacy_finalizers, op) {
+        if (mark_reachable(op) < 0) {
+            return -1;
+        }
+    }
+
+    // Move the reachable objects from the unreachable worklist to the legacy
+    // finalizer worklist.
+    struct worklist_iter iter;
+    WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
+        if (!gc_is_unreachable(op)) {
+            worklist_remove(&iter);
+            worklist_push(&state->legacy_finalizers, op);
+        }
+    }
+
+    return 0;
+}
+
+// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are
+// enqueued in `wrcb_to_call`, but not invoked yet.
+static void
+clear_weakrefs(struct collection_state *state)
+{
+    PyObject *op;
+    WORKSTACK_FOR_EACH(&state->unreachable, op) {
+        if (PyWeakref_Check(op)) {
+            // Clear weakrefs that are themselves unreachable to ensure their
+            // callbacks will not be executed later from a `tp_clear()`
+            // inside delete_garbage(). That would be unsafe: it could
+            // resurrect a dead object or access a an already cleared object.
+            // See bpo-38006 for one example.
+            _PyWeakref_ClearRef((PyWeakReference *)op);
+        }
+
+        if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
+            continue;
+        }
+
+        // NOTE: This is never triggered for static types so we can avoid the
+        // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
+        PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
+
+        // `op` may have some weakrefs.  March over the list, clear
+        // all the weakrefs, and enqueue the weakrefs with callbacks
+        // that must be called into wrcb_to_call.
+        for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
+            // _PyWeakref_ClearRef clears the weakref but leaves
+            // the callback pointer intact.  Obscure: it also
+            // changes *wrlist.
+            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
+            _PyWeakref_ClearRef(wr);
+            _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
+
+            // We do not invoke callbacks for weakrefs that are themselves
+            // unreachable. This is partly for historical reasons: weakrefs
+            // predate safe object finalization, and a weakref that is itself
+            // unreachable may have a callback that resurrects other
+            // unreachable objects.
+            if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) {
+                continue;
+            }
+
+            // Create a new reference so that wr can't go away before we can
+            // process it again.
+            merge_refcount((PyObject *)wr, 1);
+
+            // Enqueue weakref to be called later.
+            worklist_push(&state->wrcb_to_call, (PyObject *)wr);
+        }
+    }
+}
+
+static void
+call_weakref_callbacks(struct collection_state *state)
+{
+    // Invoke the callbacks we decided to honor.
+    PyObject *op;
+    while ((op = worklist_pop(&state->wrcb_to_call)) != NULL) {
+        _PyObject_ASSERT(op, PyWeakref_Check(op));
+
+        PyWeakReference *wr = (PyWeakReference *)op;
+        PyObject *callback = wr->wr_callback;
+        _PyObject_ASSERT(op, callback != NULL);
+
+        /* copy-paste of weakrefobject.c's handle_callback() */
+        PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr);
+        if (temp == NULL) {
+            PyErr_WriteUnraisable(callback);
+        }
+        else {
+            Py_DECREF(temp);
+        }
+
+        gc_restore_tid(op);
+        Py_DECREF(op);  // drop worklist reference
+    }
+}
+
+
+static GCState *
+get_gc_state(void)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    return &interp->gc;
+}
+
+
+void
+_PyGC_InitState(GCState *gcstate)
+{
+}
+
+
+PyStatus
+_PyGC_Init(PyInterpreterState *interp)
+{
+    GCState *gcstate = &interp->gc;
+
+    gcstate->garbage = PyList_New(0);
+    if (gcstate->garbage == NULL) {
+        return _PyStatus_NO_MEMORY();
+    }
+
+    gcstate->callbacks = PyList_New(0);
+    if (gcstate->callbacks == NULL) {
+        return _PyStatus_NO_MEMORY();
+    }
+
+    return _PyStatus_OK();
+}
+
+static void
+debug_cycle(const char *msg, PyObject *op)
+{
+    PySys_FormatStderr("gc: %s <%s %p>\n",
+                       msg, Py_TYPE(op)->tp_name, op);
+}
+
+/* Run first-time finalizers (if any) on all the objects in collectable.
+ * Note that this may remove some (or even all) of the objects from the
+ * list, due to refcounts falling to 0.
+ */
+static void
+finalize_garbage(struct collection_state *state)
+{
+    // NOTE: the unreachable worklist holds a strong reference to the object
+    // to prevent it from being deallocated while we are holding on to it.
+    PyObject *op;
+    WORKSTACK_FOR_EACH(&state->unreachable, op) {
+        if (!_PyGC_FINALIZED(op)) {
+            destructor finalize = Py_TYPE(op)->tp_finalize;
+            if (finalize != NULL) {
+                _PyGC_SET_FINALIZED(op);
+                finalize(op);
+                assert(!_PyErr_Occurred(_PyThreadState_GET()));
+            }
+        }
+    }
+}
+
+// Break reference cycles by clearing the containers involved.
+static void
+delete_garbage(struct collection_state *state)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    GCState *gcstate = state->gcstate;
+
+    assert(!_PyErr_Occurred(tstate));
+
+    PyObject *op;
+    while ((op = worklist_pop(&state->objs_to_decref)) != NULL) {
+        Py_DECREF(op);
+    }
+
+    while ((op = worklist_pop(&state->unreachable)) != NULL) {
+        _PyObject_ASSERT(op, gc_is_unreachable(op));
+
+        // Clear the unreachable flag.
+        gc_clear_unreachable(op);
+
+        if (!_PyObject_GC_IS_TRACKED(op)) {
+            // Object might have been untracked by some other tp_clear() call.
+            Py_DECREF(op);  // drop the reference from the worklist
+            continue;
+        }
+
+        state->collected++;
+
+        if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
+            assert(gcstate->garbage != NULL);
+            if (PyList_Append(gcstate->garbage, op) < 0) {
+                _PyErr_Clear(tstate);
+            }
+        }
+        else {
+            inquiry clear = Py_TYPE(op)->tp_clear;
+            if (clear != NULL) {
+                (void) clear(op);
+                if (_PyErr_Occurred(tstate)) {
+                    PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
+                                           Py_TYPE(op)->tp_name);
+                }
+            }
+        }
+
+        Py_DECREF(op);  // drop the reference from the worklist
+    }
+}
+
+static void
+handle_legacy_finalizers(struct collection_state *state)
+{
+    GCState *gcstate = state->gcstate;
+    assert(gcstate->garbage != NULL);
+
+    PyObject *op;
+    while ((op = worklist_pop(&state->legacy_finalizers)) != NULL) {
+        state->uncollectable++;
+
+        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
+            debug_cycle("uncollectable", op);
+        }
+
+        if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
+            if (PyList_Append(gcstate->garbage, op) < 0) {
+                PyErr_Clear();
+            }
+        }
+        Py_DECREF(op);  // drop worklist reference
+    }
+}
+
+// Show stats for objects in each generations
+static void
+show_stats_each_generations(GCState *gcstate)
+{
+    // TODO
+}
+
+// Traversal callback for handle_resurrected_objects.
+static int
+visit_decref_unreachable(PyObject *op, void *data)
+{
+    if (gc_is_unreachable(op) && _PyObject_GC_IS_TRACKED(op)) {
+        op->ob_ref_local -= 1;
+    }
+    return 0;
+}
+
+// Handle objects that may have resurrected after a call to 'finalize_garbage'.
+static int
+handle_resurrected_objects(struct collection_state *state)
+{
+    // First, find externally reachable objects by computing the reference
+    // count difference in ob_ref_local. We can't use ob_tid here because
+    // that's already used to store the unreachable worklist.
+    PyObject *op;
+    struct worklist_iter iter;
+    WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
+        assert(gc_is_unreachable(op));
+        assert(_Py_REF_IS_MERGED(op->ob_ref_shared));
+
+        if (!_PyObject_GC_IS_TRACKED(op)) {
+            // Object was untracked by a finalizer. Schedule it for a Py_DECREF
+            // after we finish with the stop-the-world pause.
+            gc_clear_unreachable(op);
+            worklist_remove(&iter);
+            worklist_push(&state->objs_to_decref, op);
+            continue;
+        }
+
+        Py_ssize_t refcount = (op->ob_ref_shared >> _Py_REF_SHARED_SHIFT);
+        if (refcount > INT32_MAX) {
+            // The refcount is too big to fit in `ob_ref_local`. Mark the
+            // object as immortal and bail out.
+            gc_clear_unreachable(op);
+            worklist_remove(&iter);
+            _Py_SetImmortal(op);
+            continue;
+        }
+
+        op->ob_ref_local += (uint32_t)refcount;
+
+        // Subtract one to account for the reference from the worklist.
+        op->ob_ref_local -= 1;
+
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        (void) traverse(op,
+            (visitproc)visit_decref_unreachable,
+            NULL);
+    }
+
+    // Find resurrected objects
+    bool any_resurrected = false;
+    WORKSTACK_FOR_EACH(&state->unreachable, op) {
+        int32_t gc_refs = (int32_t)op->ob_ref_local;
+        op->ob_ref_local = 0;  // restore ob_ref_local
+
+        _PyObject_ASSERT(op, gc_refs >= 0);
+
+        if (gc_is_unreachable(op) && gc_refs > 0) {
+            // Clear the unreachable flag on any transitively reachable objects
+            // from this one.
+            any_resurrected = true;
+            gc_clear_unreachable(op);
+            if (mark_reachable(op) < 0) {
+                return -1;
+            }
+        }
+    }
+
+    if (any_resurrected) {
+        // Remove resurrected objects from the unreachable list.
+        WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
+            if (!gc_is_unreachable(op)) {
+                _PyObject_ASSERT(op, Py_REFCNT(op) > 1);
+                worklist_remove(&iter);
+                merge_refcount(op, -1);  // remove worklist reference
+            }
+        }
+    }
+
+#ifdef GC_DEBUG
+    WORKSTACK_FOR_EACH(&state->unreachable, op) {
+        _PyObject_ASSERT(op, gc_is_unreachable(op));
+        _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op));
+        _PyObject_ASSERT(op, op->ob_ref_local == 0);
+        _PyObject_ASSERT(op, _Py_REF_IS_MERGED(op->ob_ref_shared));
+    }
+#endif
+
+    return 0;
+}
+
+
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+invoke_gc_callback(PyThreadState *tstate, const char *phase,
+                   int generation, Py_ssize_t collected,
+                   Py_ssize_t uncollectable)
+{
+    assert(!_PyErr_Occurred(tstate));
+
+    /* we may get called very early */
+    GCState *gcstate = &tstate->interp->gc;
+    if (gcstate->callbacks == NULL) {
+        return;
+    }
+
+    /* The local variable cannot be rebound, check it for sanity */
+    assert(PyList_CheckExact(gcstate->callbacks));
+    PyObject *info = NULL;
+    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
+        info = Py_BuildValue("{sisnsn}",
+            "generation", generation,
+            "collected", collected,
+            "uncollectable", uncollectable);
+        if (info == NULL) {
+            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+            return;
+        }
+    }
+
+    PyObject *phase_obj = PyUnicode_FromString(phase);
+    if (phase_obj == NULL) {
+        Py_XDECREF(info);
+        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+        return;
+    }
+
+    PyObject *stack[] = {phase_obj, info};
+    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
+        Py_INCREF(cb); /* make sure cb doesn't go away */
+        r = PyObject_Vectorcall(cb, stack, 2, NULL);
+        if (r == NULL) {
+            PyErr_WriteUnraisable(cb);
+        }
+        else {
+            Py_DECREF(r);
+        }
+        Py_DECREF(cb);
+    }
+    Py_DECREF(phase_obj);
+    Py_XDECREF(info);
+    assert(!_PyErr_Occurred(tstate));
+}
+
+
+/* Find the oldest generation (highest numbered) where the count
+ * exceeds the threshold.  Objects in the that generation and
+ * generations younger than it will be collected. */
+static int
+gc_select_generation(GCState *gcstate)
+{
+    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
+        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
+            /* Avoid quadratic performance degradation in number
+               of tracked objects (see also issue #4074):
+
+               To limit the cost of garbage collection, there are two strategies;
+                 - make each collection faster, e.g. by scanning fewer objects
+                 - do less collections
+               This heuristic is about the latter strategy.
+
+               In addition to the various configurable thresholds, we only trigger a
+               full collection if the ratio
+
+                long_lived_pending / long_lived_total
+
+               is above a given value (hardwired to 25%).
+
+               The reason is that, while "non-full" collections (i.e., collections of
+               the young and middle generations) will always examine roughly the same
+               number of objects -- determined by the aforementioned thresholds --,
+               the cost of a full collection is proportional to the total number of
+               long-lived objects, which is virtually unbounded.
+
+               Indeed, it has been remarked that doing a full collection every
+               <constant number> of object creations entails a dramatic performance
+               degradation in workloads which consist in creating and storing lots of
+               long-lived objects (e.g. building a large list of GC-tracked objects would
+               show quadratic performance, instead of linear as expected: see issue #4074).
+
+               Using the above ratio, instead, yields amortized linear performance in
+               the total number of objects (the effect of which can be summarized
+               thusly: "each full garbage collection is more and more costly as the
+               number of objects grows, but we do fewer and fewer of them").
+
+               This heuristic was suggested by Martin von Löwis on python-dev in
+               June 2008. His original analysis and proposal can be found at:
+               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
+            */
+            if (i == NUM_GENERATIONS - 1
+                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
+            {
+                continue;
+            }
+            return i;
+        }
+    }
+    return -1;
+}
+
+static void
+cleanup_worklist(struct worklist *worklist)
+{
+    PyObject *op;
+    while ((op = worklist_pop(worklist)) != NULL) {
+        gc_restore_tid(op);
+        gc_clear_unreachable(op);
+        Py_DECREF(op);
+    }
+}
+
+static void
+gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
+{
+    _PyEval_StopTheWorld(interp);
+    // Find unreachable objects
+    int err = deduce_unreachable_heap(interp, state);
+    if (err < 0) {
+        _PyEval_StartTheWorld(interp);
+        goto error;
+    }
+
+    // Print debugging information.
+    if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
+        PyObject *op;
+        WORKSTACK_FOR_EACH(&state->unreachable, op) {
+            debug_cycle("collectable", op);
+        }
+    }
+
+    // Clear weakrefs and enqueue callbacks (but do not call them).
+    clear_weakrefs(state);
+    _PyEval_StartTheWorld(interp);
+
+    // Call weakref callbacks and finalizers after unpausing other threads to
+    // avoid potential deadlocks.
+    call_weakref_callbacks(state);
+    finalize_garbage(state);
+
+    // Handle any objects that may have resurrected after the finalization.
+    _PyEval_StopTheWorld(interp);
+    err = handle_resurrected_objects(state);
+    _PyEval_StartTheWorld(interp);
+
+    if (err < 0) {
+        goto error;
+    }
+
+    // Call tp_clear on objects in the unreachable set. This will cause
+    // the reference cycles to be broken. It may also cause some objects
+    // to be freed.
+    delete_garbage(state);
+
+    // Append objects with legacy finalizers to the "gc.garbage" list.
+    handle_legacy_finalizers(state);
+    return;
+
+error:
+    cleanup_worklist(&state->unreachable);
+    cleanup_worklist(&state->legacy_finalizers);
+    cleanup_worklist(&state->wrcb_to_call);
+    cleanup_worklist(&state->objs_to_decref);
+    PyErr_NoMemory();
+    PyErr_FormatUnraisable("Out of memory during garbage collection");
+}
+
+/* This is the main function.  Read this to understand how the
+ * collection process works. */
+static Py_ssize_t
+gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+{
+    int i;
+    Py_ssize_t m = 0; /* # objects collected */
+    Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
+    _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
+    GCState *gcstate = &tstate->interp->gc;
+
+    // gc_collect_main() must not be called before _PyGC_Init
+    // or after _PyGC_Fini()
+    assert(gcstate->garbage != NULL);
+    assert(!_PyErr_Occurred(tstate));
+
+    int expected = 0;
+    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
+        // Don't start a garbage collection if one is already in progress.
+        return 0;
+    }
+
+    if (generation == GENERATION_AUTO) {
+        // Select the oldest generation that needs collecting. We will collect
+        // objects from that generation and all generations younger than it.
+        generation = gc_select_generation(gcstate);
+        if (generation < 0) {
+            // No generation needs to be collected.
+            _Py_atomic_store_int(&gcstate->collecting, 0);
+            return 0;
+        }
+    }
+
+    assert(generation >= 0 && generation < NUM_GENERATIONS);
+
+#ifdef Py_STATS
+    if (_Py_stats) {
+        _Py_stats->object_stats.object_visits = 0;
+    }
+#endif
+    GC_STAT_ADD(generation, collections, 1);
+
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(tstate, "start", generation, 0, 0);
+    }
+
+    if (gcstate->debug & _PyGC_DEBUG_STATS) {
+        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
+        show_stats_each_generations(gcstate);
+        t1 = _PyTime_GetPerfCounter();
+    }
+
+    if (PyDTrace_GC_START_ENABLED()) {
+        PyDTrace_GC_START(generation);
+    }
+
+    /* update collection and allocation counters */
+    if (generation+1 < NUM_GENERATIONS) {
+        gcstate->generations[generation+1].count += 1;
+    }
+    for (i = 0; i <= generation; i++) {
+        gcstate->generations[i].count = 0;
+    }
+
+    PyInterpreterState *interp = tstate->interp;
+
+    struct collection_state state = {
+        .interp = interp,
+        .gcstate = gcstate,
+    };
+
+    gc_collect_internal(interp, &state);
+
+    m = state.collected;
+    n = state.uncollectable;
+
+    if (gcstate->debug & _PyGC_DEBUG_STATS) {
+        double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
+        PySys_WriteStderr(
+            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
+            n+m, n, d);
+    }
+
+    // Clear free lists in all threads
+    _PyGC_ClearAllFreeLists(interp);
+
+    if (_PyErr_Occurred(tstate)) {
+        if (reason == _Py_GC_REASON_SHUTDOWN) {
+            _PyErr_Clear(tstate);
+        }
+        else {
+            PyErr_FormatUnraisable("Exception ignored in garbage collection");
+        }
+    }
+
+    /* Update stats */
+    struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
+    stats->collections++;
+    stats->collected += m;
+    stats->uncollectable += n;
+
+    GC_STAT_ADD(generation, objects_collected, m);
+#ifdef Py_STATS
+    if (_Py_stats) {
+        GC_STAT_ADD(generation, object_visits,
+            _Py_stats->object_stats.object_visits);
+        _Py_stats->object_stats.object_visits = 0;
+    }
+#endif
+
+    if (PyDTrace_GC_DONE_ENABLED()) {
+        PyDTrace_GC_DONE(n + m);
+    }
+
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(tstate, "stop", generation, m, n);
+    }
+
+    assert(!_PyErr_Occurred(tstate));
+    _Py_atomic_store_int(&gcstate->collecting, 0);
+    return n + m;
+}
+
+struct get_referrers_args {
+    struct visitor_args base;
+    PyObject *objs;
+    struct worklist results;
+};
+
+static int
+referrersvisit(PyObject* obj, void *arg)
+{
+    PyObject *objs = arg;
+    Py_ssize_t i;
+    for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
+        if (PyTuple_GET_ITEM(objs, i) == obj) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static bool
+visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area,
+                    void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, true);
+    if (op == NULL) {
+        return true;
+    }
+
+    struct get_referrers_args *arg = (struct get_referrers_args *)args;
+    if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) {
+        op->ob_tid = 0;  // we will restore the refcount later
+        worklist_push(&arg->results, op);
+    }
+
+    return true;
+}
+
+PyObject *
+_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
+{
+    PyObject *result = PyList_New(0);
+    if (!result) {
+        return NULL;
+    }
+
+    _PyEval_StopTheWorld(interp);
+
+    // Append all objects to a worklist. This abuses ob_tid. We will restore
+    // it later. NOTE: We can't append to the PyListObject during
+    // gc_visit_heaps() because PyList_Append() may reclaim an abandoned
+    // mimalloc segments while we are traversing them.
+    struct get_referrers_args args = { .objs = objs };
+    gc_visit_heaps(interp, &visit_get_referrers, &args.base);
+
+    bool error = false;
+    PyObject *op;
+    while ((op = worklist_pop(&args.results)) != NULL) {
+        gc_restore_tid(op);
+        if (op != objs && PyList_Append(result, op) < 0) {
+            error = true;
+            break;
+        }
+    }
+
+    // In case of error, clear the remaining worklist
+    while ((op = worklist_pop(&args.results)) != NULL) {
+        gc_restore_tid(op);
+    }
+
+    _PyEval_StartTheWorld(interp);
+
+    if (error) {
+        Py_DECREF(result);
+        return NULL;
+    }
+
+    return result;
+}
+
+struct get_objects_args {
+    struct visitor_args base;
+    struct worklist objects;
+};
+
+static bool
+visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
+                  void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, true);
+    if (op == NULL) {
+        return true;
+    }
+
+    struct get_objects_args *arg = (struct get_objects_args *)args;
+    op->ob_tid = 0;  // we will restore the refcount later
+    worklist_push(&arg->objects, op);
+
+    return true;
+}
+
+PyObject *
+_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
+{
+    PyObject *result = PyList_New(0);
+    if (!result) {
+        return NULL;
+    }
+
+    _PyEval_StopTheWorld(interp);
+
+    // Append all objects to a worklist. This abuses ob_tid. We will restore
+    // it later. NOTE: We can't append to the list during gc_visit_heaps()
+    // because PyList_Append() may reclaim an abandoned mimalloc segment
+    // while we are traversing it.
+    struct get_objects_args args = { 0 };
+    gc_visit_heaps(interp, &visit_get_objects, &args.base);
+
+    bool error = false;
+    PyObject *op;
+    while ((op = worklist_pop(&args.objects)) != NULL) {
+        gc_restore_tid(op);
+        if (op != result && PyList_Append(result, op) < 0) {
+            error = true;
+            break;
+        }
+    }
+
+    // In case of error, clear the remaining worklist
+    while ((op = worklist_pop(&args.objects)) != NULL) {
+        gc_restore_tid(op);
+    }
+
+    _PyEval_StartTheWorld(interp);
+
+    if (error) {
+        Py_DECREF(result);
+        return NULL;
+    }
+
+    return result;
+}
+
+static bool
+visit_freeze(const mi_heap_t *heap, const mi_heap_area_t *area,
+             void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, true);
+    if (op != NULL) {
+        op->ob_gc_bits |= _PyGC_BITS_FROZEN;
+    }
+    return true;
+}
+
+void
+_PyGC_Freeze(PyInterpreterState *interp)
+{
+    struct visitor_args args;
+    _PyEval_StopTheWorld(interp);
+    gc_visit_heaps(interp, &visit_freeze, &args);
+    _PyEval_StartTheWorld(interp);
+}
+
+static bool
+visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area,
+               void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, true);
+    if (op != NULL) {
+        op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
+    }
+    return true;
+}
+
+void
+_PyGC_Unfreeze(PyInterpreterState *interp)
+{
+    struct visitor_args args;
+    _PyEval_StopTheWorld(interp);
+    gc_visit_heaps(interp, &visit_unfreeze, &args);
+    _PyEval_StartTheWorld(interp);
+}
+
+struct count_frozen_args {
+    struct visitor_args base;
+    Py_ssize_t count;
+};
+
+static bool
+visit_count_frozen(const mi_heap_t *heap, const mi_heap_area_t *area,
+                   void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, true);
+    if (op != NULL && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) {
+        struct count_frozen_args *arg = (struct count_frozen_args *)args;
+        arg->count++;
+    }
+    return true;
+}
+
+Py_ssize_t
+_PyGC_GetFreezeCount(PyInterpreterState *interp)
+{
+    struct count_frozen_args args = { .count = 0 };
+    _PyEval_StopTheWorld(interp);
+    gc_visit_heaps(interp, &visit_count_frozen, &args.base);
+    _PyEval_StartTheWorld(interp);
+    return args.count;
+}
+
+/* C API for controlling the state of the garbage collector */
+int
+PyGC_Enable(void)
+{
+    GCState *gcstate = get_gc_state();
+    int old_state = gcstate->enabled;
+    gcstate->enabled = 1;
+    return old_state;
+}
+
+int
+PyGC_Disable(void)
+{
+    GCState *gcstate = get_gc_state();
+    int old_state = gcstate->enabled;
+    gcstate->enabled = 0;
+    return old_state;
+}
+
+int
+PyGC_IsEnabled(void)
+{
+    GCState *gcstate = get_gc_state();
+    return gcstate->enabled;
+}
+
+/* Public API to invoke gc.collect() from C */
+Py_ssize_t
+PyGC_Collect(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    GCState *gcstate = &tstate->interp->gc;
+
+    if (!gcstate->enabled) {
+        return 0;
+    }
+
+    Py_ssize_t n;
+    PyObject *exc = _PyErr_GetRaisedException(tstate);
+    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
+    _PyErr_SetRaisedException(tstate, exc);
+
+    return n;
+}
+
+Py_ssize_t
+_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+{
+    return gc_collect_main(tstate, generation, reason);
+}
+
+Py_ssize_t
+_PyGC_CollectNoFail(PyThreadState *tstate)
+{
+    /* Ideally, this function is only called on interpreter shutdown,
+       and therefore not recursively.  Unfortunately, when there are daemon
+       threads, a daemon thread can start a cyclic garbage collection
+       during interpreter shutdown (and then never finish it).
+       See http://bugs.python.org/issue8713#msg195178 for an example.
+       */
+    return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
+}
+
+void
+_PyGC_DumpShutdownStats(PyInterpreterState *interp)
+{
+    GCState *gcstate = &interp->gc;
+    if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
+        && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
+        const char *message;
+        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
+            message = "gc: %zd uncollectable objects at shutdown";
+        }
+        else {
+            message = "gc: %zd uncollectable objects at shutdown; " \
+                "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
+        }
+        /* PyErr_WarnFormat does too many things and we are at shutdown,
+           the warnings module's dependencies (e.g. linecache) may be gone
+           already. */
+        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
+                                     "gc", NULL, message,
+                                     PyList_GET_SIZE(gcstate->garbage)))
+        {
+            PyErr_WriteUnraisable(NULL);
+        }
+        if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
+            PyObject *repr = NULL, *bytes = NULL;
+            repr = PyObject_Repr(gcstate->garbage);
+            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
+                PyErr_WriteUnraisable(gcstate->garbage);
+            }
+            else {
+                PySys_WriteStderr(
+                    "      %s\n",
+                    PyBytes_AS_STRING(bytes)
+                    );
+            }
+            Py_XDECREF(repr);
+            Py_XDECREF(bytes);
+        }
+    }
+}
+
+
+void
+_PyGC_Fini(PyInterpreterState *interp)
+{
+    GCState *gcstate = &interp->gc;
+    Py_CLEAR(gcstate->garbage);
+    Py_CLEAR(gcstate->callbacks);
+
+    /* We expect that none of this interpreters objects are shared
+       with other interpreters.
+       See https://github.com/python/cpython/issues/90228. */
+}
+
+/* for debugging */
+
+#ifdef Py_DEBUG
+static int
+visit_validate(PyObject *op, void *parent_raw)
+{
+    PyObject *parent = _PyObject_CAST(parent_raw);
+    if (_PyObject_IsFreed(op)) {
+        _PyObject_ASSERT_FAILED_MSG(parent,
+                                    "PyObject_GC_Track() object is not valid");
+    }
+    return 0;
+}
+#endif
+
+
+/* extension modules might be compiled with GC support so these
+   functions must always be available */
+
+void
+PyObject_GC_Track(void *op_raw)
+{
+    PyObject *op = _PyObject_CAST(op_raw);
+    if (_PyObject_GC_IS_TRACKED(op)) {
+        _PyObject_ASSERT_FAILED_MSG(op,
+                                    "object already tracked "
+                                    "by the garbage collector");
+    }
+    _PyObject_GC_TRACK(op);
+
+#ifdef Py_DEBUG
+    /* Check that the object is valid: validate objects traversed
+       by tp_traverse() */
+    traverseproc traverse = Py_TYPE(op)->tp_traverse;
+    (void)traverse(op, visit_validate, op);
+#endif
+}
+
+void
+PyObject_GC_UnTrack(void *op_raw)
+{
+    PyObject *op = _PyObject_CAST(op_raw);
+    /* Obscure:  the Py_TRASHCAN mechanism requires that we be able to
+     * call PyObject_GC_UnTrack twice on an object.
+     */
+    if (_PyObject_GC_IS_TRACKED(op)) {
+        _PyObject_GC_UNTRACK(op);
+    }
+}
+
+int
+PyObject_IS_GC(PyObject *obj)
+{
+    return _PyObject_IS_GC(obj);
+}
+
+void
+_Py_ScheduleGC(PyInterpreterState *interp)
+{
+    _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
+}
+
+void
+_PyObject_GC_Link(PyObject *op)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    GCState *gcstate = &tstate->interp->gc;
+    gcstate->generations[0].count++; /* number of allocated GC objects */
+    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
+        gcstate->enabled &&
+        gcstate->generations[0].threshold &&
+        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
+        !_PyErr_Occurred(tstate))
+    {
+        _Py_ScheduleGC(tstate->interp);
+    }
+}
+
+void
+_Py_RunGC(PyThreadState *tstate)
+{
+    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
+}
+
+static PyObject *
+gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (basicsize > PY_SSIZE_T_MAX - presize) {
+        return _PyErr_NoMemory(tstate);
+    }
+    size_t size = presize + basicsize;
+    char *mem = _PyObject_MallocWithType(tp, size);
+    if (mem == NULL) {
+        return _PyErr_NoMemory(tstate);
+    }
+    ((PyObject **)mem)[0] = NULL;
+    ((PyObject **)mem)[1] = NULL;
+    PyObject *op = (PyObject *)(mem + presize);
+    _PyObject_GC_Link(op);
+    return op;
+}
+
+PyObject *
+_PyObject_GC_New(PyTypeObject *tp)
+{
+    size_t presize = _PyType_PreHeaderSize(tp);
+    PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp), presize);
+    if (op == NULL) {
+        return NULL;
+    }
+    _PyObject_Init(op, tp);
+    return op;
+}
+
+PyVarObject *
+_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
+{
+    PyVarObject *op;
+
+    if (nitems < 0) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+    size_t presize = _PyType_PreHeaderSize(tp);
+    size_t size = _PyObject_VAR_SIZE(tp, nitems);
+    op = (PyVarObject *)gc_alloc(tp, size, presize);
+    if (op == NULL) {
+        return NULL;
+    }
+    _PyObject_InitVar(op, tp, nitems);
+    return op;
+}
+
+PyObject *
+PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
+{
+    size_t presize = _PyType_PreHeaderSize(tp);
+    PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp) + extra_size, presize);
+    if (op == NULL) {
+        return NULL;
+    }
+    memset(op, 0, _PyObject_SIZE(tp) + extra_size);
+    _PyObject_Init(op, tp);
+    return op;
+}
+
+PyVarObject *
+_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
+{
+    const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
+    const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
+    _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
+    if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
+        return (PyVarObject *)PyErr_NoMemory();
+    }
+    char *mem = (char *)op - presize;
+    mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem,  presize + basicsize);
+    if (mem == NULL) {
+        return (PyVarObject *)PyErr_NoMemory();
+    }
+    op = (PyVarObject *) (mem + presize);
+    Py_SET_SIZE(op, nitems);
+    return op;
+}
+
+void
+PyObject_GC_Del(void *op)
+{
+    size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
+    if (_PyObject_GC_IS_TRACKED(op)) {
+#ifdef Py_DEBUG
+        PyObject *exc = PyErr_GetRaisedException();
+        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
+                                     "gc", NULL, "Object of type %s is not untracked before destruction",
+                                     ((PyObject*)op)->ob_type->tp_name)) {
+            PyErr_WriteUnraisable(NULL);
+        }
+        PyErr_SetRaisedException(exc);
+#endif
+    }
+    GCState *gcstate = get_gc_state();
+    if (gcstate->generations[0].count > 0) {
+        gcstate->generations[0].count--;
+    }
+    PyObject_Free(((char *)op)-presize);
+}
+
+int
+PyObject_GC_IsTracked(PyObject* obj)
+{
+    if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
+        return 1;
+    }
+    return 0;
+}
+
+int
+PyObject_GC_IsFinalized(PyObject *obj)
+{
+    if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
+         return 1;
+    }
+    return 0;
+}
+
+struct custom_visitor_args {
+    struct visitor_args base;
+    gcvisitobjects_t callback;
+    void *arg;
+};
+
+static bool
+custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
+                       void *block, size_t block_size, void *args)
+{
+    PyObject *op = op_from_block(block, args, false);
+    if (op == NULL) {
+        return true;
+    }
+
+    struct custom_visitor_args *wrapper = (struct custom_visitor_args *)args;
+    if (!wrapper->callback(op, wrapper->arg)) {
+        return false;
+    }
+
+    return true;
+}
+
+void
+PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct custom_visitor_args wrapper = {
+        .callback = callback,
+        .arg = arg,
+    };
+
+    _PyEval_StopTheWorld(interp);
+    gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base);
+    _PyEval_StartTheWorld(interp);
+}
+
 /* Clear all free lists
  * All free lists are cleared during the collection of the highest generation.
  * Allocated items in the free list may keep a pymalloc arena occupied.
@@ -25,4 +1720,4 @@ _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
     HEAD_UNLOCK(&_PyRuntime);
 }
 
-#endif
+#endif  // Py_GIL_DISABLED
diff --git a/Python/object_stack.c b/Python/object_stack.c
new file mode 100644 (file)
index 0000000..66b37bc
--- /dev/null
@@ -0,0 +1,87 @@
+// Stack of Python objects
+
+#include "Python.h"
+#include "pycore_freelist.h"
+#include "pycore_pystate.h"
+#include "pycore_object_stack.h"
+
+extern _PyObjectStackChunk *_PyObjectStackChunk_New(void);
+extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *);
+
+static struct _Py_object_stack_state *
+get_state(void)
+{
+    _PyFreeListState *state = _PyFreeListState_GET();
+    return &state->object_stack_state;
+}
+
+_PyObjectStackChunk *
+_PyObjectStackChunk_New(void)
+{
+    _PyObjectStackChunk *buf;
+    struct _Py_object_stack_state *state = get_state();
+    if (state->numfree > 0) {
+        buf = state->free_list;
+        state->free_list = buf->prev;
+        state->numfree--;
+    }
+    else {
+        // NOTE: we use PyMem_RawMalloc() here because this is used by the GC
+        // during mimalloc heap traversal. In that context, it is not safe to
+        // allocate mimalloc memory, such as via PyMem_Malloc().
+        buf = PyMem_RawMalloc(sizeof(_PyObjectStackChunk));
+        if (buf == NULL) {
+            return NULL;
+        }
+    }
+    buf->prev = NULL;
+    buf->n = 0;
+    return buf;
+}
+
+void
+_PyObjectStackChunk_Free(_PyObjectStackChunk *buf)
+{
+    assert(buf->n == 0);
+    struct _Py_object_stack_state *state = get_state();
+    if (state->numfree >= 0 &&
+        state->numfree < _PyObjectStackChunk_MAXFREELIST)
+    {
+        buf->prev = state->free_list;
+        state->free_list = buf;
+        state->numfree++;
+    }
+    else {
+        PyMem_RawFree(buf);
+    }
+}
+
+void
+_PyObjectStack_Clear(_PyObjectStack *queue)
+{
+    while (queue->head != NULL) {
+        _PyObjectStackChunk *buf = queue->head;
+        buf->n = 0;
+        queue->head = buf->prev;
+        _PyObjectStackChunk_Free(buf);
+    }
+}
+
+void
+_PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization)
+{
+    if (!is_finalization) {
+        // Ignore requests to clear the free list during GC. We use object
+        // stacks during GC, so emptying the free-list is counterproductive.
+        return;
+    }
+
+    struct _Py_object_stack_state *state = &free_lists->object_stack_state;
+    while (state->numfree > 0) {
+        _PyObjectStackChunk *buf = state->free_list;
+        state->free_list = buf->prev;
+        state->numfree--;
+        PyMem_RawFree(buf);
+    }
+    state->numfree = -1;
+}
index c9b521351444a712d7d7872a2b0498040ac65c59..5ad5b6f3fcc634596cecfca0cbe5d5ee368754a4 100644 (file)
@@ -10,6 +10,7 @@
 #include "pycore_frame.h"
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_object.h"        // _PyType_InitCache()
+#include "pycore_object_stack.h"  // _PyObjectStackChunk_ClearFreeList()
 #include "pycore_parking_lot.h"   // _PyParkingLot_AfterFork()
 #include "pycore_pyerrors.h"      // _PyErr_Clear()
 #include "pycore_pylifecycle.h"   // _PyAST_Fini()
@@ -1468,6 +1469,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization)
     _PyList_ClearFreeList(state, is_finalization);
     _PyContext_ClearFreeList(state, is_finalization);
     _PyAsyncGen_ClearFreeLists(state, is_finalization);
+    _PyObjectStackChunk_ClearFreeList(state, is_finalization);
 }
 
 void
@@ -2055,7 +2057,6 @@ start_the_world(struct _stoptheworld_state *stw)
     HEAD_LOCK(runtime);
     stw->requested = 0;
     stw->world_stopped = 0;
-    stw->requester = NULL;
     // Switch threads back to the detached state.
     PyInterpreterState *i;
     PyThreadState *t;
@@ -2066,6 +2067,7 @@ start_the_world(struct _stoptheworld_state *stw)
             _PyParkingLot_UnparkAll(&t->state);
         }
     }
+    stw->requester = NULL;
     HEAD_UNLOCK(runtime);
     if (stw->is_global) {
         _PyRWMutex_Unlock(&runtime->stoptheworld_mutex);