]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-126491: Lower heap size limit with faster marking (GH-127519)
authorMark Shannon <mark@hotpy.org>
Fri, 6 Dec 2024 10:46:59 +0000 (10:46 +0000)
committerGitHub <noreply@github.com>
Fri, 6 Dec 2024 10:46:59 +0000 (10:46 +0000)
* Faster marking of reachable objects

* Changes calculation of work to do and work done.

* Merges transitive closure calculations

InternalDocs/garbage_collector.md
Lib/test/test_gc.py
Objects/dictobject.c
Objects/genobject.c
Objects/typeobject.c
Python/gc.c

index 08db080a200ea4d8fa0770b2df01c00df6c0faf6..4761f78f3593e376cd3c558d9382a28b554f5c2e 100644 (file)
@@ -199,22 +199,22 @@ unreachable:
 
 ```pycon
 >>> import gc
->>> 
+>>>
 >>> class Link:
 ...    def __init__(self, next_link=None):
 ...        self.next_link = next_link
-...  
+...
 >>> link_3 = Link()
 >>> link_2 = Link(link_3)
 >>> link_1 = Link(link_2)
 >>> link_3.next_link = link_1
 >>> A = link_1
 >>> del link_1, link_2, link_3
->>> 
+>>>
 >>> link_4 = Link()
 >>> link_4.next_link = link_4
 >>> del link_4
->>> 
+>>>
 >>> # Collect the unreachable Link object (and its .__dict__ dict).
 >>> gc.collect()
 2
@@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
 >>> # Create a reference cycle.
 >>> x = MyObj()
 >>> x.self = x
->>> 
+>>>
 >>> # Initially the object is in the young generation.
 >>> gc.get_objects(generation=0)
 [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
->>> 
+>>>
 >>> # After a collection of the youngest generation the object
 >>> # moves to the old generation.
 >>> gc.collect(generation=0)
@@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are
 added to the working set.
 Then the above algorithm is repeated, starting from step 2.
 
+Determining how much work to do
+-------------------------------
+
+We need to do a certain amount of work to enusre that garbage is collected,
+but doing too much work slows down execution.
+
+To work out how much work we need to do, consider a heap with `L` live objects
+and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects
+at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and
+we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`.
+For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which
+`G1` garbage objects are created.
+
+The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`,
+assuming that the number of live objects remains roughly constant.
+If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1`
+For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio.
+
+In other words, to keep the garbage fraction to 1/3 or less we need to visit
+4 times as many objects as are newly created.
+
+We can do better than this though. Not all new objects will be garbage.
+Consider the heap at the end of the scavenge with `L1` live objects and `G1`
+garbage. Also, note that `T == M + I` where `M` is the number of objects marked
+as reachable and `I` is the number of objects visited in increments.
+Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`.
+
+If we choose the amount of work done such that `2*M + I == 6N` then we can do
+less work in most cases, but are still guaranteed to keep up.
+Since `I ≳ G0 + G1` (not strictly true, but close enough)
+`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up.
+
+The reason that this improves performance is that `M` is usually much larger
+than `I`. If `M == 10I`, then `T ≅ 3N`.
+
+Finally, instead of using a fixed multiple of 8, we gradually increase it as the
+heap grows. This avoids wasting work for small heaps and during startup.
+
 
 Optimization: reusing fields to save memory
 ===========================================
index b5140057a69d36f9d659dee9f4805a8bd3291696..baf8e95dffdfced2f844b25c397858b853ebc567 100644 (file)
@@ -1161,27 +1161,19 @@ class IncrementalGCTests(unittest.TestCase):
             return head
 
         head = make_ll(1000)
-        count = 1000
-
-        # There will be some objects we aren't counting,
-        # e.g. the gc stats dicts. This test checks
-        # that the counts don't grow, so we try to
-        # correct for the uncounted objects
-        # This is just an estimate.
-        CORRECTION = 20
 
         enabled = gc.isenabled()
         gc.enable()
         olds = []
         initial_heap_size = _testinternalcapi.get_tracked_heap_size()
-        for i in range(20_000):
+        iterations = max(20_000, initial_heap_size)
+        for i in range(iterations):
             newhead = make_ll(20)
-            count += 20
             newhead.surprise = head
             olds.append(newhead)
             if len(olds) == 20:
                 new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
-                self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
+                self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations")
                 del olds[:]
         if not enabled:
             gc.disable()
index 1c9f86438dadc3e4c7f2e8d3288d7de986560aeb..de518b8dc5024bd9e34ebd3995cb720577bff6bd 100644 (file)
@@ -7064,9 +7064,7 @@ int
 PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
 {
     PyTypeObject *tp = Py_TYPE(obj);
-    if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
-        return 0;
-    }
+    assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
     if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
         PyDictValues *values = _PyObject_InlineValues(obj);
         if (values->valid) {
index e87f199c2504ba67863de93916df4548127aade7..33679afecb420fd1ed4ccf0654db452eb96ec087 100644 (file)
@@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = {
     gen_methods,                                /* tp_methods */
     gen_memberlist,                             /* tp_members */
     gen_getsetlist,                             /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 static PyObject *
@@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = {
     coro_methods,                               /* tp_methods */
     coro_memberlist,                            /* tp_members */
     coro_getsetlist,                            /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 static void
@@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue {
     (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \
      _Py_CAST(_PyAsyncGenWrappedValue*, (op)))
 
-
 static int
 async_gen_traverse(PyObject *self, visitproc visit, void *arg)
 {
@@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = {
     async_gen_methods,                          /* tp_methods */
     async_gen_memberlist,                       /* tp_members */
     async_gen_getsetlist,                       /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
-    0,                                          /* tp_free */
-    0,                                          /* tp_is_gc */
-    0,                                          /* tp_bases */
-    0,                                          /* tp_mro */
-    0,                                          /* tp_cache */
-    0,                                          /* tp_subclasses */
-    0,                                          /* tp_weaklist */
-    0,                                          /* tp_del */
-    0,                                          /* tp_version_tag */
-    _PyGen_Finalize,                            /* tp_finalize */
+    .tp_finalize = _PyGen_Finalize,
 };
 
 
@@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = {
     PyObject_SelfIter,                          /* tp_iter */
     async_gen_asend_iternext,                   /* tp_iternext */
     async_gen_asend_methods,                    /* tp_methods */
-    0,                                          /* tp_members */
-    0,                                          /* tp_getset */
-    0,                                          /* tp_base */
-    0,                                          /* tp_dict */
-    0,                                          /* tp_descr_get */
-    0,                                          /* tp_descr_set */
-    0,                                          /* tp_dictoffset */
-    0,                                          /* tp_init */
-    0,                                          /* tp_alloc */
-    0,                                          /* tp_new */
     .tp_finalize = async_gen_asend_finalize,
 };
 
index 2068d6aa9be52b608c304df742fb2b40e0e57e4b..cc95b9857e3f2d68b6f4742ce3ad16b030479e5d 100644 (file)
@@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg)
     return 0;
 }
 
+
+static int
+plain_object_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyTypeObject *type = Py_TYPE(self);
+    assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+    Py_VISIT(type);
+    return PyObject_VisitManagedDict(self, visit, arg);
+}
+
 static void
 clear_slots(PyTypeObject *type, PyObject *self)
 {
@@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type)
         assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);
         type->tp_flags |= Py_TPFLAGS_MANAGED_DICT;
         type->tp_dictoffset = -1;
+        if (type->tp_basicsize == sizeof(PyObject)) {
+            type->tp_traverse = plain_object_traverse;
+        }
     }
 
     type->tp_basicsize = slotoffset;
index 5b9588c8741b9771d1c5d1ac747d9df6100f1786..fd29a48518e71b43c7820adb860afafbe1398ddb 100644 (file)
@@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space)
  * space faster than objects are added to the old space.
  *
  * Each young or incremental collection adds a number of
- * objects, S (for survivors) to the old space, and
- * incremental collectors scan I objects from the old space.
- * I > S must be true. We also want I > S * N to be where
- * N > 1. Higher values of N mean that the old space is
+ * new objects (N) to the heap, and incremental collectors
+ * scan I objects from the old space.
+ * I > N must be true. We also want I > N * K to be where
+ * K > 2. Higher values of K mean that the old space is
  * scanned more rapidly.
- * The default incremental threshold of 10 translates to
- * N == 1.4 (1 + 4/threshold)
  */
-
-/* Divide by 10, so that the default incremental threshold of 10
- * scans objects at 1% of the heap size */
-#define SCAN_RATE_DIVISOR 10
+#define SCAN_RATE_DIVISOR 5
 
 static void
 add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
@@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate,
     validate_spaces(gcstate);
 }
 
-#ifndef NDEBUG
-static inline int
-IS_IN_VISITED(PyGC_Head *gc, int visited_space)
+typedef struct work_stack {
+    PyGC_Head *top;
+    int visited_space;
+} WorkStack;
+
+/* Remove gc from the list it is currently in and push it to the stack */
+static inline void
+push_to_stack(PyGC_Head *gc, WorkStack *stack)
 {
-    assert(visited_space == 0 || other_space(visited_space) == 0);
-    return gc_old_space(gc) == visited_space;
+    PyGC_Head *prev = GC_PREV(gc);
+    PyGC_Head *next = GC_NEXT(gc);
+    _PyGCHead_SET_NEXT(prev, next);
+    _PyGCHead_SET_PREV(next, prev);
+    _PyGCHead_SET_PREV(gc, stack->top);
+    stack->top = gc;
 }
-#endif
 
-struct container_and_flag {
-    PyGC_Head *container;
-    int visited_space;
-    intptr_t size;
-};
+static inline PyGC_Head *
+pop_from_stack(WorkStack *stack)
+{
+    PyGC_Head *gc = stack->top;
+    stack->top = _PyGCHead_PREV(gc);
+    return gc;
+}
 
-/* A traversal callback for adding to container) */
-static int
-visit_add_to_container(PyObject *op, void *arg)
+/* append list `from` to `stack`; `from` becomes an empty list */
+static void
+move_list_to_stack(PyGC_Head *from, WorkStack *stack)
 {
-    OBJECT_STAT_INC(object_visits);
-    struct container_and_flag *cf = (struct container_and_flag *)arg;
-    int visited = cf->visited_space;
-    assert(visited == get_gc_state()->visited_space);
-    if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+    if (!gc_list_is_empty(from)) {
+        PyGC_Head *from_head = GC_NEXT(from);
+        PyGC_Head *from_tail = GC_PREV(from);
+        _PyGCHead_SET_PREV(from_head, stack->top);
+        stack->top = from_tail;
+        gc_list_init(from);
+    }
+}
+
+static inline void
+move_to_stack(PyObject *op, WorkStack *stack, int visited_space)
+{
+    assert(op != NULL);
+    if (_PyObject_IS_GC(op)) {
         PyGC_Head *gc = AS_GC(op);
         if (_PyObject_GC_IS_TRACKED(op) &&
-            gc_old_space(gc) != visited) {
+            gc_old_space(gc) != visited_space) {
+            assert(!_Py_IsImmortal(op));
             gc_flip_old_space(gc);
-            gc_list_move(gc, cf->container);
-            cf->size++;
+            push_to_stack(gc, stack);
         }
     }
-    return 0;
 }
 
-static intptr_t
-expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
-{
-    struct container_and_flag arg = {
-        .container = container,
-        .visited_space = gcstate->visited_space,
-        .size = 0
-    };
-    assert(GC_NEXT(gc) == container);
-    while (gc != container) {
-        /* Survivors will be moved to visited space, so they should
-         * have been marked as visited */
-        assert(IS_IN_VISITED(gc, gcstate->visited_space));
-        PyObject *op = FROM_GC(gc);
-        assert(_PyObject_GC_IS_TRACKED(op));
-        if (_Py_IsImmortal(op)) {
-            PyGC_Head *next = GC_NEXT(gc);
-            gc_list_move(gc, &get_gc_state()->permanent_generation.head);
-            gc = next;
-            continue;
-        }
-        traverseproc traverse = Py_TYPE(op)->tp_traverse;
-        (void) traverse(op,
-                        visit_add_to_container,
-                        &arg);
-        gc = GC_NEXT(gc);
-    }
-    return arg.size;
+static void
+move_unvisited(PyObject *op, WorkStack *stack, int visited_space)
+{
+    move_to_stack(op, stack, visited_space);
+}
+
+#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V))
+
+/* A traversal callback for adding to container */
+static int
+visit_add_to_container(PyObject *op, void *arg)
+{
+    OBJECT_STAT_INC(object_visits);
+    WorkStack *stack = (WorkStack *)arg;
+    assert(stack->visited_space == get_gc_state()->visited_space);
+    move_to_stack(op, stack, stack->visited_space);
+    return 0;
 }
 
 /* Do bookkeeping for a completed GC cycle */
@@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate)
         gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
     }
     assert(gc_list_is_empty(&gcstate->old[visited].head));
-    gcstate->work_to_do = 0;
     gcstate->phase = GC_PHASE_MARK;
 }
 
-static intptr_t
-move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
-{
-    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
-        PyGC_Head *gc = AS_GC(op);
-        if (_PyObject_GC_IS_TRACKED(op) &&
-            gc_old_space(gc) != visited_space) {
-            gc_flip_old_space(gc);
-            gc_list_move(gc, reachable);
-            return 1;
+static void
+frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space)
+{
+    _PyStackRef *locals = frame->localsplus;
+    _PyStackRef *sp = frame->stackpointer;
+    if (frame->f_locals != NULL) {
+        move_unvisited(frame->f_locals, stack, visited_space);
+    }
+    PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+    move_unvisited(func, stack, visited_space);
+    while (sp > locals) {
+        sp--;
+        _PyStackRef ref = *sp;
+        if (!PyStackRef_IsNull(ref)) {
+            PyObject *op = PyStackRef_AsPyObjectBorrow(ref);
+            if (!_Py_IsImmortal(op)) {
+                move_unvisited(op, stack, visited_space);
+            }
         }
     }
-    return 0;
 }
 
-static intptr_t
-mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
+static Py_ssize_t
+move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space)
 {
     // Transitively traverse all objects from reachable, until empty
-    struct container_and_flag arg = {
-        .container = reachable,
-        .visited_space = visited_space,
-        .size = 0
-    };
-    while (!gc_list_is_empty(reachable)) {
-        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
+    Py_ssize_t objects_marked = 0;
+    while (stack->top != NULL) {
+        PyGC_Head *gc = pop_from_stack(stack);
         assert(gc_old_space(gc) == visited_space);
-        gc_list_move(gc, visited);
+        gc_list_append(gc, visited);
+        objects_marked++;
         PyObject *op = FROM_GC(gc);
-        traverseproc traverse = Py_TYPE(op)->tp_traverse;
-        (void) traverse(op,
-                        visit_add_to_container,
-                        &arg);
+        assert(PyObject_IS_GC(op));
+        assert(_PyObject_GC_IS_TRACKED(op));
+        if (_Py_IsImmortal(op)) {
+            _PyObject_GC_UNTRACK(op);
+        }
+        else {
+            traverseproc traverse = Py_TYPE(op)->tp_traverse;
+            (void) traverse(op, visit_add_to_container, stack);
+        }
     }
     gc_list_validate_space(visited, visited_space);
-    return arg.size;
+    return objects_marked;
 }
 
 static intptr_t
 mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
 {
-    PyGC_Head reachable;
-    gc_list_init(&reachable);
-    Py_ssize_t objects_marked = 0;
+    WorkStack stack;
+    stack.top = NULL;
+    stack.visited_space = visited_space;
     // Move all objects on stacks to reachable
     _PyRuntimeState *runtime = &_PyRuntime;
     HEAD_LOCK(runtime);
@@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
                 frame = frame->previous;
                 continue;
             }
-            _PyStackRef *locals = frame->localsplus;
-            _PyStackRef *sp = frame->stackpointer;
-            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
-            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
-            objects_marked += move_to_reachable(func, &reachable, visited_space);
-            while (sp > locals) {
-                sp--;
-                if (PyStackRef_IsNull(*sp)) {
-                    continue;
-                }
-                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
-                if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
-                    PyGC_Head *gc = AS_GC(op);
-                    if (_PyObject_GC_IS_TRACKED(op) &&
-                        gc_old_space(gc) != visited_space) {
-                        gc_flip_old_space(gc);
-                        objects_marked++;
-                        gc_list_move(gc, &reachable);
-                    }
-                }
-            }
+            frame_move_unvisited(frame, &stack, visited_space);
             if (!start && frame->visited) {
                 // If this frame has already been visited, then the lower frames
                 // will have already been visited and will not have changed
@@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
         ts = PyThreadState_Next(ts);
         HEAD_UNLOCK(runtime);
     }
-    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
-    assert(gc_list_is_empty(&reachable));
+    Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space);
+    assert(stack.top == NULL);
     return objects_marked;
 }
 
 static intptr_t
 mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
 {
-    PyGC_Head reachable;
-    gc_list_init(&reachable);
-    Py_ssize_t objects_marked = 0;
-    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
-    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
-    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
+    WorkStack stack;
+    stack.top = NULL;
+    stack.visited_space = visited_space;
+    MOVE_UNVISITED(interp->sysdict, &stack, visited_space);
+    MOVE_UNVISITED(interp->builtins, &stack, visited_space);
+    MOVE_UNVISITED(interp->dict, &stack, visited_space);
     struct types_state *types = &interp->types;
     for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
-        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
-        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
+         MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space);
+         MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space);
     }
     for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
-        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
-        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
+        MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space);
+        MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space);
     }
-    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
-    assert(gc_list_is_empty(&reachable));
+    Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space);
+    assert(stack.top == NULL);
     return objects_marked;
 }
 
@@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate)
     PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
     Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
     objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
-    gcstate->work_to_do -= objects_marked;
     gcstate->phase = GC_PHASE_COLLECT;
     validate_spaces(gcstate);
     return objects_marked;
 }
 
+
+/* See InternalDocs/garbage_collector.md for more details. */
+#define MAX_HEAP_PORTION_MULTIPLIER 5
+#define MARKING_PROGRESS_MULTIPLIER 2
+
 static intptr_t
 assess_work_to_do(GCState *gcstate)
 {
-    /* The amount of work we want to do depends on three things.
+    /* The amount of work we want to do depends on two things.
      * 1. The number of new objects created
-     * 2. The growth in heap size since the last collection
-     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
-     *
-     * For a steady state heap, the amount of work to do is three times the number
-     * of new objects added to the heap. This ensures that we stay ahead in the
-     * worst case of all new objects being garbage.
-     *
-     * This could be improved by tracking survival rates, but it is still a
-     * large improvement on the non-marking approach.
+     * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects)
      */
     intptr_t scale_factor = gcstate->old[0].threshold;
     if (scale_factor < 2) {
         scale_factor = 2;
     }
     intptr_t new_objects = gcstate->young.count;
-    intptr_t max_heap_fraction = new_objects*3/2;
-    intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
-    if (heap_fraction > max_heap_fraction) {
-        heap_fraction = max_heap_fraction;
+    intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER;
+    intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
+    if (heap_portion > max_heap_portion) {
+        heap_portion = max_heap_portion;
     }
     gcstate->young.count = 0;
-    return new_objects + heap_fraction;
+    return new_objects + heap_portion;
 }
 
 static void
@@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
     if (gcstate->phase == GC_PHASE_MARK) {
         Py_ssize_t objects_marked = mark_at_start(tstate);
         GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
-        gcstate->work_to_do -= objects_marked;
+        gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER;
         validate_spaces(gcstate);
         return;
     }
     PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
     PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
-    PyGC_Head increment;
-    gc_list_init(&increment);
-    int scale_factor = gcstate->old[0].threshold;
-    if (scale_factor < 2) {
-        scale_factor = 2;
-    }
     intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
     GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
-    gcstate->work_to_do -= objects_marked;
+    gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER;
     gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
-    gc_list_merge(&gcstate->young.head, &increment);
+    PyGC_Head increment;
+    gc_list_init(&increment);
+    WorkStack working;
+    working.top = 0;
+    working.visited_space = gcstate->visited_space;
+    move_list_to_stack(&gcstate->young.head, &working);
+    Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space);
     gc_list_validate_space(&increment, gcstate->visited_space);
-    Py_ssize_t increment_size = gc_list_size(&increment);
+    assert(working.top == NULL);
     while (increment_size < gcstate->work_to_do) {
         if (gc_list_is_empty(not_visited)) {
             break;
         }
         PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
-        gc_list_move(gc, &increment);
-        increment_size++;
-        assert(!_Py_IsImmortal(FROM_GC(gc)));
         gc_set_old_space(gc, gcstate->visited_space);
-        increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
+        push_to_stack(gc, &working);
+        assert(!_Py_IsImmortal(FROM_GC(gc)));
+        increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space);
+        assert(working.top == NULL);
     }
+    assert(increment_size == gc_list_size(&increment));
     GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
     validate_list(&increment, collecting_clear_unreachable_clear);
     gc_list_validate_space(&increment, gcstate->visited_space);
@@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
     gc_collect_region(tstate, &increment, &survivors, stats);
     gc_list_merge(&survivors, visited);
     assert(gc_list_is_empty(&increment));
-    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
     gcstate->work_to_do -= increment_size;
 
     add_stats(gcstate, 1, stats);
@@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate,
     gcstate->old[0].count = 0;
     gcstate->old[1].count = 0;
     completed_scavenge(gcstate);
+    gcstate->work_to_do = -gcstate->young.threshold;
     _PyGC_ClearAllFreeLists(tstate->interp);
     validate_spaces(gcstate);
     add_stats(gcstate, 2, stats);