]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-108362: Incremental GC implementation (GH-108038)
authorMark Shannon <mark@hotpy.org>
Mon, 5 Feb 2024 18:28:51 +0000 (18:28 +0000)
committerGitHub <noreply@github.com>
Mon, 5 Feb 2024 18:28:51 +0000 (18:28 +0000)
13 files changed:
Doc/whatsnew/3.13.rst
Include/internal/pycore_gc.h
Include/internal/pycore_object.h
Include/internal/pycore_runtime_init.h
Lib/test/test_gc.py
Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst [new file with mode: 0644]
Modules/gcmodule.c
Objects/object.c
Objects/structseq.c
Python/gc.c
Python/gc_free_threading.c
Python/import.c
Tools/gdb/libpython.py

index c25d41351c2f3cd96679d56ea9de134b901db108..0770e28d230b4b346ce09800d1f76fb033ff67dc 100644 (file)
@@ -92,6 +92,10 @@ Interpreter improvements:
 New Features
 ============
 
+* The cyclic garbage collector is now incremental.
+  This means that maximum pause times are reduced,
+  by an order of magnitude or more for larger heaps.
+
 Improved Error Messages
 -----------------------
 
@@ -101,6 +105,13 @@ Improved Error Messages
   variables. See also :ref:`using-on-controlling-color`.
   (Contributed by Pablo Galindo Salgado in :gh:`112730`.)
 
+Incremental Garbage Collection
+------------------------------
+
+* The cycle garbage collector is now incremental.
+  This means that maximum pause times are reduced
+  by an order of magnitude or more for larger heaps.
+
 Other Language Changes
 ======================
 
@@ -232,6 +243,29 @@ fractions
   sign handling, minimum width and grouping. (Contributed by Mark Dickinson
   in :gh:`111320`.)
 
+gc
+--
+* The cyclic garbage collector is now incremental, which changes the meanings
+  of the results of :meth:`gc.get_threshold` and :meth:`gc.get_threshold` as
+  well as :meth:`gc.get_count` and :meth:`gc.get_stats`.
+* :meth:`gc.get_threshold` returns a three-tuple for backwards compatibility,
+  the first value is the threshold for young collections, as before, the second
+  value determines the rate at which the old collection is scanned; the
+  default is 10 and higher values mean that the old collection is scanned more slowly.
+  The third value is meangless and is always zero.
+* :meth:`gc.set_threshold` ignores any items after the second.
+* :meth:`gc.get_count` and :meth:`gc.get_stats`.
+  These functions return the same format of results as before.
+  The only difference is that instead of the results refering to
+  the young, aging and old generations, the results refer to the
+  young generation and the aging and collecting spaces of the old generation.
+
+In summary, code that attempted to manipulate the behavior of the cycle GC may
+not work as well as intended, but it is very unlikely to harmful.
+All other code will work just fine.
+Uses should avoid calling :meth:`gc.collect` unless their workload is episodic,
+but that has always been the case to some extent.
+
 glob
 ----
 
index ca1d9fdf5253b8dc99c1f96da01f239368a9237f..d2f5c69b45ee399c6f2daa2d241ef35fa45ca0a1 100644 (file)
@@ -71,11 +71,15 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) {
 
 /* Bit flags for _gc_prev */
 /* Bit 0 is set when tp_finalize is called */
-#define _PyGC_PREV_MASK_FINALIZED  (1)
+#define _PyGC_PREV_MASK_FINALIZED  1
 /* Bit 1 is set when the object is in generation which is GCed currently. */
-#define _PyGC_PREV_MASK_COLLECTING (2)
+#define _PyGC_PREV_MASK_COLLECTING 2
+
+/* Bit 0 is set if the object belongs to old space 1 */
+#define _PyGC_NEXT_MASK_OLD_SPACE_1    1
+
 /* The (N-2) most significant bits contain the real address. */
-#define _PyGC_PREV_SHIFT           (2)
+#define _PyGC_PREV_SHIFT           2
 #define _PyGC_PREV_MASK            (((uintptr_t) -1) << _PyGC_PREV_SHIFT)
 
 /* set for debugging information */
@@ -101,11 +105,13 @@ typedef enum {
 // Lowest bit of _gc_next is used for flags only in GC.
 // But it is always 0 for normal code.
 static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) {
-    uintptr_t next = gc->_gc_next;
+    uintptr_t next = gc->_gc_next & _PyGC_PREV_MASK;
     return (PyGC_Head*)next;
 }
 static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) {
-    gc->_gc_next = (uintptr_t)next;
+    uintptr_t unext = (uintptr_t)next;
+    assert((unext & ~_PyGC_PREV_MASK) == 0);
+    gc->_gc_next = (gc->_gc_next & ~_PyGC_PREV_MASK) | unext;
 }
 
 // Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags.
@@ -113,6 +119,7 @@ static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) {
     uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK);
     return (PyGC_Head*)prev;
 }
+
 static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
     uintptr_t uprev = (uintptr_t)prev;
     assert((uprev & ~_PyGC_PREV_MASK) == 0);
@@ -198,6 +205,13 @@ struct gc_generation {
                   generations */
 };
 
+struct gc_collection_stats {
+    /* number of collected objects */
+    Py_ssize_t collected;
+    /* total number of uncollectable objects (put into gc.garbage) */
+    Py_ssize_t uncollectable;
+};
+
 /* Running stats per generation */
 struct gc_generation_stats {
     /* total number of collections */
@@ -219,8 +233,8 @@ struct _gc_runtime_state {
     int enabled;
     int debug;
     /* linked lists of container objects */
-    struct gc_generation generations[NUM_GENERATIONS];
-    PyGC_Head *generation0;
+    struct gc_generation young;
+    struct gc_generation old[2];
     /* a permanent generation which won't be collected */
     struct gc_generation permanent_generation;
     struct gc_generation_stats generation_stats[NUM_GENERATIONS];
@@ -233,22 +247,20 @@ struct _gc_runtime_state {
     /* This is the number of objects that survived the last full
        collection. It approximates the number of long lived objects
        tracked by the GC.
-
        (by "full collection", we mean a collection of the oldest
        generation). */
     Py_ssize_t long_lived_total;
-    /* This is the number of objects that survived all "non-full"
-       collections, and are awaiting to undergo a full collection for
-       the first time. */
-    Py_ssize_t long_lived_pending;
+
+    Py_ssize_t work_to_do;
+    /* Which of the old spaces is the visited space */
+    int visited_space;
 };
 
 
 extern void _PyGC_InitState(struct _gc_runtime_state *);
 
-extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation,
-                                _PyGC_Reason reason);
-extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate);
+extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason);
+extern void _PyGC_CollectNoFail(PyThreadState *tstate);
 
 /* Freeze objects tracked by the GC and ignore them in future collections. */
 extern void _PyGC_Freeze(PyInterpreterState *interp);
index 34a83ea228e8b10e0f74a1362a39cfe02625ad1d..efa712c4a0b4588397d2250d1b1b3ca71e3fcfa0 100644 (file)
@@ -125,19 +125,7 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
 }
 #define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
 
-static inline void _Py_SetImmortal(PyObject *op)
-{
-    if (op) {
-#ifdef Py_GIL_DISABLED
-        op->ob_tid = _Py_UNOWNED_TID;
-        op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
-        op->ob_ref_shared = 0;
-#else
-        op->ob_refcnt = _Py_IMMORTAL_REFCNT;
-#endif
-    }
-}
-#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
+extern void _Py_SetImmortal(PyObject *op);
 
 // Makes an immortal object mortal again with the specified refcnt. Should only
 // be used during runtime finalization.
@@ -325,11 +313,12 @@ static inline void _PyObject_GC_TRACK(
                           filename, lineno, __func__);
 
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    PyGC_Head *generation0 = interp->gc.generation0;
+    PyGC_Head *generation0 = &interp->gc.young.head;
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
     _PyGCHead_SET_NEXT(last, gc);
     _PyGCHead_SET_PREV(gc, last);
     _PyGCHead_SET_NEXT(gc, generation0);
+    assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0);
     generation0->_gc_prev = (uintptr_t)gc;
 #endif
 }
index 0a5c92bb84b524f25cd1221283a4e1efe209b97d..4370ad05bdc0584015501a0b3557f05753b3878d 100644 (file)
@@ -160,12 +160,12 @@ extern PyTypeObject _PyExc_MemoryError;
         }, \
         .gc = { \
             .enabled = 1, \
-            .generations = { \
-                /* .head is set in _PyGC_InitState(). */ \
-                { .threshold = 700, }, \
-                { .threshold = 10, }, \
+            .young = { .threshold = 2000, }, \
+            .old = { \
                 { .threshold = 10, }, \
+                { .threshold = 0, }, \
             }, \
+            .work_to_do = -5000, \
         }, \
         .object_state = _py_object_state_INIT(INTERP), \
         .dtoa = _dtoa_state_INIT(&(INTERP)), \
index b01f344cb14a1a77cace0c99692aa74c193ac7ec..0002852fce964384fe63fa39c149b04f7f3ab0a4 100644 (file)
@@ -383,19 +383,11 @@ class GCTests(unittest.TestCase):
         # each call to collect(N)
         x = []
         gc.collect(0)
-        # x is now in gen 1
+        # x is now in the old gen
         a, b, c = gc.get_count()
-        gc.collect(1)
-        # x is now in gen 2
-        d, e, f = gc.get_count()
-        gc.collect(2)
-        # x is now in gen 3
-        g, h, i = gc.get_count()
-        # We don't check a, d, g since their exact values depends on
+        # We don't check a since its exact values depends on
         # internal implementation details of the interpreter.
         self.assertEqual((b, c), (1, 0))
-        self.assertEqual((e, f), (0, 1))
-        self.assertEqual((h, i), (0, 0))
 
     def test_trashcan(self):
         class Ouch:
@@ -846,16 +838,6 @@ class GCTests(unittest.TestCase):
         self.assertFalse(
                 any(l is element for element in gc.get_objects(generation=2))
         )
-        gc.collect(generation=1)
-        self.assertFalse(
-                any(l is element for element in gc.get_objects(generation=0))
-        )
-        self.assertFalse(
-                any(l is element for element in  gc.get_objects(generation=1))
-        )
-        self.assertTrue(
-                any(l is element for element in gc.get_objects(generation=2))
-        )
         gc.collect(generation=2)
         self.assertFalse(
                 any(l is element for element in gc.get_objects(generation=0))
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst
new file mode 100644 (file)
index 0000000..1fe4e0f
--- /dev/null
@@ -0,0 +1,13 @@
+Implements an incremental cyclic garbage collector. By collecting the old
+generation in increments, there is no need for a full heap scan. This can
+hugely reduce maximum pause time for programs with large heaps.
+
+Reduces the number of generations from three to two. The old generation is
+split into two spaces, "aging" and "collecting".
+
+Collection happens in two steps:: * First, the young generation is scanned
+and the survivors moved to the end of the aging space. * Then objects are
+taken from the collecting space,    at such a rate that all cycles are
+collected eventually.    Those objects are then scanned and the survivors
+moved to    the end of the aging space.    When the collecting space becomes
+empty, the two spaces are swapped.
index ffddef34ecce7a5e6383ac82a415260d0eeb2353..3b63dd7a9a83533b8b0db7aede7efdb0f4cc61bf 100644 (file)
@@ -158,17 +158,12 @@ gc_set_threshold_impl(PyObject *module, int threshold0, int group_right_1,
 {
     GCState *gcstate = get_gc_state();
 
-    gcstate->generations[0].threshold = threshold0;
+    gcstate->young.threshold = threshold0;
     if (group_right_1) {
-        gcstate->generations[1].threshold = threshold1;
+        gcstate->old[0].threshold = threshold1;
     }
     if (group_right_2) {
-        gcstate->generations[2].threshold = threshold2;
-
-        /* generations higher than 2 get the same threshold */
-        for (int i = 3; i < NUM_GENERATIONS; i++) {
-            gcstate->generations[i].threshold = gcstate->generations[2].threshold;
-        }
+        gcstate->old[1].threshold = threshold2;
     }
     Py_RETURN_NONE;
 }
@@ -185,9 +180,9 @@ gc_get_threshold_impl(PyObject *module)
 {
     GCState *gcstate = get_gc_state();
     return Py_BuildValue("(iii)",
-                         gcstate->generations[0].threshold,
-                         gcstate->generations[1].threshold,
-                         gcstate->generations[2].threshold);
+                         gcstate->young.threshold,
+                         gcstate->old[0].threshold,
+                         0);
 }
 
 /*[clinic input]
@@ -202,9 +197,9 @@ gc_get_count_impl(PyObject *module)
 {
     GCState *gcstate = get_gc_state();
     return Py_BuildValue("(iii)",
-                         gcstate->generations[0].count,
-                         gcstate->generations[1].count,
-                         gcstate->generations[2].count);
+                         gcstate->young.count,
+                         gcstate->old[gcstate->visited_space].count,
+                         gcstate->old[gcstate->visited_space^1].count);
 }
 
 /*[clinic input]
index bbf7f98ae3daf92e277a7d6c4fa9f4666df060ce..7247eb21df6b6eb92fd0dd1b7c545b551e77ca31 100644 (file)
@@ -2387,6 +2387,21 @@ _Py_NewReferenceNoTotal(PyObject *op)
     new_reference(op);
 }
 
+void
+_Py_SetImmortal(PyObject *op)
+{
+    if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) {
+        _PyObject_GC_UNTRACK(op);
+    }
+#ifdef Py_GIL_DISABLED
+    op->ob_tid = _Py_UNOWNED_TID;
+    op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
+    op->ob_ref_shared = 0;
+#else
+    op->ob_refcnt = _Py_IMMORTAL_REFCNT;
+#endif
+}
+
 void
 _Py_ResurrectReference(PyObject *op)
 {
index 581d6ad240885a0867debe8748757a34d66b885a..661d96a968fb809fbdd6f721234e3883d7eedf7c 100644 (file)
@@ -603,6 +603,9 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
                                        PyStructSequence_Desc *desc,
                                        unsigned long tp_flags)
 {
+    if (Py_TYPE(type) == NULL) {
+        Py_SET_TYPE(type, &PyType_Type);
+    }
     Py_ssize_t n_unnamed_members;
     Py_ssize_t n_members = count_members(desc, &n_unnamed_members);
     PyMemberDef *members = NULL;
@@ -618,7 +621,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
         }
         initialize_static_fields(type, desc, members, tp_flags);
 
-        _Py_SetImmortal(type);
+        _Py_SetImmortal((PyObject *)type);
     }
 #ifndef NDEBUG
     else {
index 466467602915264c845998ba4e9e321c6fd0ef43..cda12ff7fbc982f55e4b710f6c7ac63c3b4df922 100644 (file)
@@ -45,7 +45,7 @@ typedef struct _gc_runtime_state GCState;
 // move_legacy_finalizers() removes this flag instead.
 // Between them, unreachable list is not normal list and we can not use
 // most gc_list_* functions for it.
-#define NEXT_MASK_UNREACHABLE  (1)
+#define NEXT_MASK_UNREACHABLE  2
 
 #define AS_GC(op) _Py_AS_GC(op)
 #define FROM_GC(gc) _Py_FROM_GC(gc)
@@ -95,9 +95,48 @@ gc_decref(PyGC_Head *g)
     g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
 }
 
+static inline int
+gc_old_space(PyGC_Head *g)
+{
+    return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
 
-#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
+static inline int
+flip_old_space(int space)
+{
+    assert(space == 0 || space == 1);
+    return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
 
+static inline void
+gc_flip_old_space(PyGC_Head *g)
+{
+    g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
+
+static inline void
+gc_set_old_space(PyGC_Head *g, int space)
+{
+    assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1);
+    g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1;
+    g->_gc_next |= space;
+}
+
+static PyGC_Head *
+GEN_HEAD(GCState *gcstate, int n)
+{
+    assert((gcstate->visited_space & (~1)) == 0);
+    switch(n) {
+        case 0:
+            return &gcstate->young.head;
+        case 1:
+            return &gcstate->old[gcstate->visited_space].head;
+        case 2:
+            return &gcstate->old[gcstate->visited_space^1].head;
+        default:
+            Py_UNREACHABLE();
+    }
+}
 
 static GCState *
 get_gc_state(void)
@@ -116,11 +155,12 @@ _PyGC_InitState(GCState *gcstate)
         GEN.head._gc_prev = (uintptr_t)&GEN.head; \
     } while (0)
 
-    for (int i = 0; i < NUM_GENERATIONS; i++) {
-        assert(gcstate->generations[i].count == 0);
-        INIT_HEAD(gcstate->generations[i]);
-    };
-    gcstate->generation0 = GEN_HEAD(gcstate, 0);
+    assert(gcstate->young.count == 0);
+    assert(gcstate->old[0].count == 0);
+    assert(gcstate->old[1].count == 0);
+    INIT_HEAD(gcstate->young);
+    INIT_HEAD(gcstate->old[0]);
+    INIT_HEAD(gcstate->old[1]);
     INIT_HEAD(gcstate->permanent_generation);
 
 #undef INIT_HEAD
@@ -218,6 +258,7 @@ gc_list_is_empty(PyGC_Head *list)
 static inline void
 gc_list_append(PyGC_Head *node, PyGC_Head *list)
 {
+    assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0);
     PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
 
     // last <-> node
@@ -275,6 +316,8 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to)
         PyGC_Head *from_tail = GC_PREV(from);
         assert(from_head != from);
         assert(from_tail != from);
+        assert(gc_list_is_empty(to) ||
+            gc_old_space(to_tail) == gc_old_space(from_tail));
 
         _PyGCHead_SET_NEXT(to_tail, from_head);
         _PyGCHead_SET_PREV(from_head, to_tail);
@@ -343,8 +386,8 @@ enum flagstates {collecting_clear_unreachable_clear,
 static void
 validate_list(PyGC_Head *head, enum flagstates flags)
 {
-    assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
-    assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
+    assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
+    assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
     uintptr_t prev_value = 0, next_value = 0;
     switch (flags) {
         case collecting_clear_unreachable_clear:
@@ -366,7 +409,7 @@ validate_list(PyGC_Head *head, enum flagstates flags)
     PyGC_Head *gc = GC_NEXT(head);
     while (gc != head) {
         PyGC_Head *trueprev = GC_PREV(gc);
-        PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
+        PyGC_Head *truenext = GC_NEXT(gc);
         assert(truenext != NULL);
         assert(trueprev == prev);
         assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
@@ -376,8 +419,44 @@ validate_list(PyGC_Head *head, enum flagstates flags)
     }
     assert(prev == GC_PREV(head));
 }
+
+static void
+validate_old(GCState *gcstate)
+{
+    for (int space = 0; space < 2; space++) {
+        PyGC_Head *head = &gcstate->old[space].head;
+        PyGC_Head *gc = GC_NEXT(head);
+        while (gc != head) {
+            PyGC_Head *next = GC_NEXT(gc);
+            assert(gc_old_space(gc) == space);
+            gc = next;
+        }
+    }
+}
+
+static void
+validate_consistent_old_space(PyGC_Head *head)
+{
+    PyGC_Head *prev = head;
+    PyGC_Head *gc = GC_NEXT(head);
+    if (gc == head) {
+        return;
+    }
+    int old_space = gc_old_space(gc);
+    while (gc != head) {
+        PyGC_Head *truenext = GC_NEXT(gc);
+        assert(truenext != NULL);
+        assert(gc_old_space(gc) == old_space);
+        prev = gc;
+        gc = truenext;
+    }
+    assert(prev == GC_PREV(head));
+}
+
 #else
 #define validate_list(x, y) do{}while(0)
+#define validate_old(g) do{}while(0)
+#define validate_consistent_old_space(l) do{}while(0)
 #endif
 
 /*** end of list stuff ***/
@@ -394,15 +473,7 @@ update_refs(PyGC_Head *containers)
 
     while (gc != containers) {
         next = GC_NEXT(gc);
-        /* Move any object that might have become immortal to the
-         * permanent generation as the reference count is not accurately
-         * reflecting the actual number of live references to this object
-         */
-        if (_Py_IsImmortal(FROM_GC(gc))) {
-           gc_list_move(gc, &get_gc_state()->permanent_generation.head);
-           gc = next;
-           continue;
-        }
+        assert(!_Py_IsImmortal(FROM_GC(gc)));
         gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
         /* Python's cyclic gc should never see an incoming refcount
          * of 0:  if something decref'ed to 0, it should have been
@@ -500,12 +571,13 @@ visit_reachable(PyObject *op, void *arg)
         // Manually unlink gc from unreachable list because the list functions
         // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
         PyGC_Head *prev = GC_PREV(gc);
-        PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
+        PyGC_Head *next = GC_NEXT(gc);
         _PyObject_ASSERT(FROM_GC(prev),
                          prev->_gc_next & NEXT_MASK_UNREACHABLE);
         _PyObject_ASSERT(FROM_GC(next),
                          next->_gc_next & NEXT_MASK_UNREACHABLE);
-        prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
+        prev->_gc_next = gc->_gc_next;  // copy flag bits
+        gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
         _PyGCHead_SET_PREV(next, prev);
 
         gc_list_append(gc, reachable);
@@ -557,6 +629,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
      * or to the right have been scanned yet.
      */
 
+    validate_consistent_old_space(young);
+    /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
+    uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
     while (gc != young) {
         if (gc_get_refs(gc)) {
             /* gc is definitely reachable from outside the
@@ -602,17 +677,18 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
             // But this may pollute the unreachable list head's 'next' pointer
             // too. That's semantically senseless but expedient here - the
             // damage is repaired when this function ends.
-            last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
+            last->_gc_next = flags | (uintptr_t)gc;
             _PyGCHead_SET_PREV(gc, last);
-            gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
+            gc->_gc_next = flags | (uintptr_t)unreachable;
             unreachable->_gc_prev = (uintptr_t)gc;
         }
-        gc = (PyGC_Head*)prev->_gc_next;
+        gc = _PyGCHead_NEXT(prev);
     }
     // young->_gc_prev must be last element remained in the list.
     young->_gc_prev = (uintptr_t)prev;
+    young->_gc_next &= _PyGC_PREV_MASK;
     // don't let the pollution of the list head's next pointer leak
-    unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
+    unreachable->_gc_next &= _PyGC_PREV_MASK;
 }
 
 static void
@@ -669,8 +745,8 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
         PyObject *op = FROM_GC(gc);
 
         _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
+        next = GC_NEXT(gc);
         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
-        next = (PyGC_Head*)gc->_gc_next;
 
         if (has_legacy_finalizer(op)) {
             gc_clear_collecting(gc);
@@ -689,8 +765,8 @@ clear_unreachable_mask(PyGC_Head *unreachable)
     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
         _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
+        next = GC_NEXT(gc);
         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
-        next = (PyGC_Head*)gc->_gc_next;
     }
     validate_list(unreachable, collecting_set_unreachable_clear);
 }
@@ -1023,25 +1099,6 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate,
 }
 
 
-// Show stats for objects in each generations
-static void
-show_stats_each_generations(GCState *gcstate)
-{
-    char buf[100];
-    size_t pos = 0;
-
-    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
-        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
-                             " %zd",
-                             gc_list_size(GEN_HEAD(gcstate, i)));
-    }
-
-    PySys_FormatStderr(
-        "gc: objects in each generation:%s\n"
-        "gc: objects in permanent generation: %zd\n",
-        buf, gc_list_size(&gcstate->permanent_generation.head));
-}
-
 /* Deduce which objects among "base" are unreachable from outside the list
    and move them to 'unreachable'. The process consist in the following steps:
 
@@ -1115,7 +1172,6 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
      * the reachable objects instead.  But this is a one-time cost, probably not
      * worth complicating the code to speed just a little.
      */
-    gc_list_init(unreachable);
     move_unreachable(base, unreachable);  // gc_prev is pointer again
     validate_list(base, collecting_clear_unreachable_clear);
     validate_list(unreachable, collecting_set_unreachable_set);
@@ -1154,219 +1210,272 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
 }
 
 
-/* Invoke progress callbacks to notify clients that garbage collection
- * is starting or stopping
- */
+#define UNTRACK_TUPLES 1
+#define UNTRACK_DICTS 2
+
 static void
-invoke_gc_callback(PyThreadState *tstate, const char *phase,
-                   int generation, Py_ssize_t collected,
-                   Py_ssize_t uncollectable)
-{
-    assert(!_PyErr_Occurred(tstate));
+gc_collect_region(PyThreadState *tstate,
+                  PyGC_Head *from,
+                  PyGC_Head *to,
+                  int untrack,
+                  struct gc_collection_stats *stats);
 
-    /* we may get called very early */
-    GCState *gcstate = &tstate->interp->gc;
-    if (gcstate->callbacks == NULL) {
-        return;
+static inline Py_ssize_t
+gc_list_set_space(PyGC_Head *list, int space)
+{
+    Py_ssize_t size = 0;
+    PyGC_Head *gc;
+    for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
+        gc_set_old_space(gc, space);
+        size++;
     }
+    return size;
+}
 
-    /* The local variable cannot be rebound, check it for sanity */
-    assert(PyList_CheckExact(gcstate->callbacks));
-    PyObject *info = NULL;
-    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsn}",
-            "generation", generation,
-            "collected", collected,
-            "uncollectable", uncollectable);
-        if (info == NULL) {
-            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
-            return;
+
+static void
+add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
+{
+    gcstate->generation_stats[gen].collected += stats->collected;
+    gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
+    gcstate->generation_stats[gen].collections += 1;
+}
+
+
+/* Multiply by 4 so that the default incremental threshold of 10
+ * scans objects at 40% the rate that the young gen tenures them. */
+#define SCAN_RATE_MULTIPLIER 4
+
+
+static void
+gc_collect_young(PyThreadState *tstate,
+                 struct gc_collection_stats *stats)
+{
+    GCState *gcstate = &tstate->interp->gc;
+    PyGC_Head *young = &gcstate->young.head;
+    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+#ifdef Py_STATS
+    {
+        Py_ssize_t count = 0;
+        PyGC_Head *gc;
+        for (gc = GC_NEXT(young); gc != young; gc = GC_NEXT(gc)) {
+            count++;
         }
     }
+#endif
 
-    PyObject *phase_obj = PyUnicode_FromString(phase);
-    if (phase_obj == NULL) {
-        Py_XDECREF(info);
-        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
-        return;
+    PyGC_Head survivors;
+    gc_list_init(&survivors);
+    gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats);
+    Py_ssize_t survivor_count = 0;
+    if (gcstate->visited_space) {
+        /* objects in visited space have bit set, so we set it here */
+        survivor_count = gc_list_set_space(&survivors, 1);
     }
-
-    PyObject *stack[] = {phase_obj, info};
-    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
-        Py_INCREF(cb); /* make sure cb doesn't go away */
-        r = PyObject_Vectorcall(cb, stack, 2, NULL);
-        if (r == NULL) {
-            PyErr_WriteUnraisable(cb);
-        }
-        else {
-            Py_DECREF(r);
+    else {
+        PyGC_Head *gc;
+        for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) {
+#ifdef GC_DEBUG
+            assert(gc_old_space(gc) == 0);
+#endif
+            survivor_count++;
         }
-        Py_DECREF(cb);
     }
-    Py_DECREF(phase_obj);
-    Py_XDECREF(info);
-    assert(!_PyErr_Occurred(tstate));
+    gc_list_merge(&survivors, visited);
+    validate_old(gcstate);
+    gcstate->young.count = 0;
+    gcstate->old[gcstate->visited_space].count++;
+    Py_ssize_t scale_factor = gcstate->old[0].threshold;
+    if (scale_factor < 1) {
+        scale_factor = 1;
+    }
+    gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor;
+    add_stats(gcstate, 0, stats);
+}
+
+static inline int
+is_in_visited(PyGC_Head *gc, int visited_space)
+{
+    assert(visited_space == 0 || flip_old_space(visited_space) == 0);
+    return gc_old_space(gc) == visited_space;
 }
 
+struct container_and_flag {
+    PyGC_Head *container;
+    int visited_space;
+};
 
-/* Find the oldest generation (highest numbered) where the count
- * exceeds the threshold.  Objects in the that generation and
- * generations younger than it will be collected. */
+/* A traversal callback for adding to container) */
 static int
-gc_select_generation(GCState *gcstate)
-{
-    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
-            /* Avoid quadratic performance degradation in number
-               of tracked objects (see also issue #4074):
-
-               To limit the cost of garbage collection, there are two strategies;
-                 - make each collection faster, e.g. by scanning fewer objects
-                 - do less collections
-               This heuristic is about the latter strategy.
-
-               In addition to the various configurable thresholds, we only trigger a
-               full collection if the ratio
-
-                long_lived_pending / long_lived_total
-
-               is above a given value (hardwired to 25%).
-
-               The reason is that, while "non-full" collections (i.e., collections of
-               the young and middle generations) will always examine roughly the same
-               number of objects -- determined by the aforementioned thresholds --,
-               the cost of a full collection is proportional to the total number of
-               long-lived objects, which is virtually unbounded.
-
-               Indeed, it has been remarked that doing a full collection every
-               <constant number> of object creations entails a dramatic performance
-               degradation in workloads which consist in creating and storing lots of
-               long-lived objects (e.g. building a large list of GC-tracked objects would
-               show quadratic performance, instead of linear as expected: see issue #4074).
-
-               Using the above ratio, instead, yields amortized linear performance in
-               the total number of objects (the effect of which can be summarized
-               thusly: "each full garbage collection is more and more costly as the
-               number of objects grows, but we do fewer and fewer of them").
-
-               This heuristic was suggested by Martin von Löwis on python-dev in
-               June 2008. His original analysis and proposal can be found at:
-               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-            */
-            if (i == NUM_GENERATIONS - 1
-                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
-            {
-                continue;
-            }
-            return i;
+visit_add_to_container(PyObject *op, void *arg)
+{
+    OBJECT_STAT_INC(object_visits);
+    struct container_and_flag *cf = (struct container_and_flag *)arg;
+    int visited = cf->visited_space;
+    assert(visited == get_gc_state()->visited_space);
+    if (_PyObject_IS_GC(op)) {
+        PyGC_Head *gc = AS_GC(op);
+        if (_PyObject_GC_IS_TRACKED(op) &&
+            gc_old_space(gc) != visited) {
+            assert(!_Py_IsImmortal(op));
+            gc_flip_old_space(gc);
+            gc_list_move(gc, cf->container);
         }
     }
-    return -1;
+    return 0;
 }
 
-
-/* This is the main function.  Read this to understand how the
- * collection process works. */
-static Py_ssize_t
-gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+static uintptr_t
+expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
 {
-    int i;
-    Py_ssize_t m = 0; /* # objects collected */
-    Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
-    PyGC_Head *young; /* the generation we are examining */
-    PyGC_Head *old; /* next older generation */
-    PyGC_Head unreachable; /* non-problematic unreachable trash */
-    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
-    PyGC_Head *gc;
-    _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
-    GCState *gcstate = &tstate->interp->gc;
-
-    // gc_collect_main() must not be called before _PyGC_Init
-    // or after _PyGC_Fini()
-    assert(gcstate->garbage != NULL);
-    assert(!_PyErr_Occurred(tstate));
+    validate_list(container, collecting_clear_unreachable_clear);
+    struct container_and_flag arg = {
+        .container = container,
+        .visited_space = gcstate->visited_space,
+    };
+    uintptr_t size = 0;
+    assert(GC_NEXT(gc) == container);
+    while (gc != container) {
+        /* Survivors will be moved to visited space, so they should
+         * have been marked as visited */
+        assert(is_in_visited(gc, gcstate->visited_space));
+        PyObject *op = FROM_GC(gc);
+        if (_Py_IsImmortal(op)) {
+            PyGC_Head *next = GC_NEXT(gc);
+            gc_list_move(gc, &get_gc_state()->permanent_generation.head);
+            gc = next;
+            continue;
+        }
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        (void) traverse(op,
+                        visit_add_to_container,
+                        &arg);
+        gc = GC_NEXT(gc);
+        size++;
+    }
+    return size;
+}
 
-    int expected = 0;
-    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
-        // Don't start a garbage collection if one is already in progress.
-        return 0;
+/* Do bookkeeping for a completed GC cycle */
+static void
+completed_cycle(GCState *gcstate)
+{
+    assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head));
+    assert(gc_list_is_empty(&gcstate->young.head));
+    gcstate->visited_space = flip_old_space(gcstate->visited_space);
+    if (gcstate->work_to_do > 0) {
+        gcstate->work_to_do = 0;
     }
+}
 
-    if (generation == GENERATION_AUTO) {
-        // Select the oldest generation that needs collecting. We will collect
-        // objects from that generation and all generations younger than it.
-        generation = gc_select_generation(gcstate);
-        if (generation < 0) {
-            // No generation needs to be collected.
-            _Py_atomic_store_int(&gcstate->collecting, 0);
-            return 0;
+static void
+gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
+{
+    GCState *gcstate = &tstate->interp->gc;
+    if (gcstate->work_to_do <= 0) {
+        /* No work to do */
+        return;
+    }
+    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
+    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    PyGC_Head increment;
+    gc_list_init(&increment);
+    if (gc_list_is_empty(not_visited)) {
+        completed_cycle(gcstate);
+        return;
+    }
+    Py_ssize_t region_size = 0;
+    while (region_size < gcstate->work_to_do) {
+        if (gc_list_is_empty(not_visited)) {
+            break;
         }
+        PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
+        gc_list_move(gc, &increment);
+        gc_set_old_space(gc, gcstate->visited_space);
+        region_size += expand_region_transitively_reachable(&increment, gc, gcstate);
     }
-
-    assert(generation >= 0 && generation < NUM_GENERATIONS);
-
-#ifdef Py_STATS
-    if (_Py_stats) {
-        _Py_stats->object_stats.object_visits = 0;
+    assert(region_size == gc_list_size(&increment));
+    PyGC_Head survivors;
+    gc_list_init(&survivors);
+    gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
+    gc_list_merge(&survivors, visited);
+    assert(gc_list_is_empty(&increment));
+    gcstate->work_to_do -= region_size;
+    validate_old(gcstate);
+    add_stats(gcstate, 1, stats);
+    if (gc_list_is_empty(not_visited)) {
+        completed_cycle(gcstate);
     }
-#endif
-    GC_STAT_ADD(generation, collections, 1);
+}
 
-    if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "start", generation, 0, 0);
-    }
 
-    if (gcstate->debug & _PyGC_DEBUG_STATS) {
-        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
-        show_stats_each_generations(gcstate);
-        t1 = _PyTime_GetPerfCounter();
+static void
+gc_collect_full(PyThreadState *tstate,
+                struct gc_collection_stats *stats)
+{
+    GCState *gcstate = &tstate->interp->gc;
+    validate_old(gcstate);
+    PyGC_Head *young = &gcstate->young.head;
+    PyGC_Head *old0 = &gcstate->old[0].head;
+    PyGC_Head *old1 = &gcstate->old[1].head;
+    /* merge all generations into old0 */
+    gc_list_merge(young, old0);
+    gcstate->young.count = 0;
+    PyGC_Head *gc = GC_NEXT(old1);
+    while (gc != old1) {
+        PyGC_Head *next = GC_NEXT(gc);
+        gc_set_old_space(gc, 0);
+        gc = next;
     }
+    gc_list_merge(old1, old0);
 
-    if (PyDTrace_GC_START_ENABLED()) {
-        PyDTrace_GC_START(generation);
-    }
+    gc_collect_region(tstate, old0, old0,
+                      UNTRACK_TUPLES | UNTRACK_DICTS,
+                      stats);
+    gcstate->visited_space = 1;
+    gcstate->young.count = 0;
+    gcstate->old[0].count = 0;
+    gcstate->old[1].count = 0;
 
-    /* update collection and allocation counters */
-    if (generation+1 < NUM_GENERATIONS) {
-        gcstate->generations[generation+1].count += 1;
-    }
-    for (i = 0; i <= generation; i++) {
-        gcstate->generations[i].count = 0;
-    }
+    gcstate->work_to_do = - gcstate->young.threshold * 2;
 
-    /* merge younger generations with one we are currently collecting */
-    for (i = 0; i < generation; i++) {
-        gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
-    }
+    _PyGC_ClearAllFreeLists(tstate->interp);
+    validate_old(gcstate);
+    add_stats(gcstate, 2, stats);
+}
 
-    /* handy references */
-    young = GEN_HEAD(gcstate, generation);
-    if (generation < NUM_GENERATIONS-1) {
-        old = GEN_HEAD(gcstate, generation+1);
-    }
-    else {
-        old = young;
-    }
-    validate_list(old, collecting_clear_unreachable_clear);
+/* This is the main function. Read this to understand how the
+ * collection process works. */
+static void
+gc_collect_region(PyThreadState *tstate,
+                  PyGC_Head *from,
+                  PyGC_Head *to,
+                  int untrack,
+                  struct gc_collection_stats *stats)
+{
+    PyGC_Head unreachable; /* non-problematic unreachable trash */
+    PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
+    PyGC_Head *gc; /* initialize to prevent a compiler warning */
+    GCState *gcstate = &tstate->interp->gc;
 
-    deduce_unreachable(young, &unreachable);
+    assert(gcstate->garbage != NULL);
+    assert(!_PyErr_Occurred(tstate));
 
-    untrack_tuples(young);
-    /* Move reachable objects to next generation. */
-    if (young != old) {
-        if (generation == NUM_GENERATIONS - 2) {
-            gcstate->long_lived_pending += gc_list_size(young);
-        }
-        gc_list_merge(young, old);
+    gc_list_init(&unreachable);
+    deduce_unreachable(from, &unreachable);
+    validate_consistent_old_space(from);
+    if (untrack & UNTRACK_TUPLES) {
+        untrack_tuples(from);
     }
-    else {
-        /* We only un-track dicts in full collections, to avoid quadratic
-           dict build-up. See issue #14775. */
-        untrack_dicts(young);
-        gcstate->long_lived_pending = 0;
-        gcstate->long_lived_total = gc_list_size(young);
+    if (untrack & UNTRACK_DICTS) {
+        untrack_dicts(from);
     }
+    validate_consistent_old_space(to);
+    if (from != to) {
+        gc_list_merge(from, to);
+    }
+    validate_consistent_old_space(to);
+    /* Move reachable objects to next generation. */
 
     /* All objects in unreachable are trash, but objects reachable from
      * legacy finalizers (e.g. tp_del) can't safely be deleted.
@@ -1380,10 +1489,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
      * and we move those into the finalizers list too.
      */
     move_legacy_finalizer_reachable(&finalizers);
-
     validate_list(&finalizers, collecting_clear_unreachable_clear);
     validate_list(&unreachable, collecting_set_unreachable_clear);
-
     /* Print debugging information. */
     if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
         for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
@@ -1392,89 +1499,99 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     }
 
     /* Clear weakrefs and invoke callbacks as necessary. */
-    m += handle_weakrefs(&unreachable, old);
-
-    validate_list(old, collecting_clear_unreachable_clear);
+    stats->collected += handle_weakrefs(&unreachable, to);
+    validate_list(to, collecting_clear_unreachable_clear);
     validate_list(&unreachable, collecting_set_unreachable_clear);
 
     /* Call tp_finalize on objects which have one. */
     finalize_garbage(tstate, &unreachable);
-
     /* Handle any objects that may have resurrected after the call
      * to 'finalize_garbage' and continue the collection with the
      * objects that are still unreachable */
     PyGC_Head final_unreachable;
-    handle_resurrected_objects(&unreachable, &final_unreachable, old);
+    gc_list_init(&final_unreachable);
+    handle_resurrected_objects(&unreachable, &final_unreachable, to);
 
     /* Call tp_clear on objects in the final_unreachable set.  This will cause
     * the reference cycles to be broken.  It may also cause some objects
     * in finalizers to be freed.
     */
-    m += gc_list_size(&final_unreachable);
-    delete_garbage(tstate, gcstate, &final_unreachable, old);
+    stats->collected += gc_list_size(&final_unreachable);
+    delete_garbage(tstate, gcstate, &final_unreachable, to);
 
     /* Collect statistics on uncollectable objects found and print
      * debugging information. */
+    Py_ssize_t n = 0;
     for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
         n++;
         if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
             debug_cycle("uncollectable", FROM_GC(gc));
     }
-    if (gcstate->debug & _PyGC_DEBUG_STATS) {
-        double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
-        PySys_WriteStderr(
-            "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
-            n+m, n, d);
-    }
-
+    stats->uncollectable = n;
     /* Append instances in the uncollectable set to a Python
      * reachable list of garbage.  The programmer has to deal with
      * this if they insist on creating this type of structure.
      */
-    handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
-    validate_list(old, collecting_clear_unreachable_clear);
+    handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
+    validate_list(to, collecting_clear_unreachable_clear);
+}
 
-    /* Clear free list only during the collection of the highest
-     * generation */
-    if (generation == NUM_GENERATIONS-1) {
-        _PyGC_ClearAllFreeLists(tstate->interp);
-    }
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+do_gc_callback(GCState *gcstate, const char *phase,
+                   int generation, struct gc_collection_stats *stats)
+{
+    assert(!PyErr_Occurred());
 
-    if (_PyErr_Occurred(tstate)) {
-        if (reason == _Py_GC_REASON_SHUTDOWN) {
-            _PyErr_Clear(tstate);
-        }
-        else {
-            PyErr_FormatUnraisable("Exception ignored in garbage collection");
+    /* The local variable cannot be rebound, check it for sanity */
+    assert(PyList_CheckExact(gcstate->callbacks));
+    PyObject *info = NULL;
+    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
+        info = Py_BuildValue("{sisnsn}",
+            "generation", generation,
+            "collected", stats->collected,
+            "uncollectable", stats->uncollectable);
+        if (info == NULL) {
+            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+            return;
         }
     }
 
-    /* Update stats */
-    struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
-    stats->collections++;
-    stats->collected += m;
-    stats->uncollectable += n;
-
-    GC_STAT_ADD(generation, objects_collected, m);
-#ifdef Py_STATS
-    if (_Py_stats) {
-        GC_STAT_ADD(generation, object_visits,
-            _Py_stats->object_stats.object_visits);
-        _Py_stats->object_stats.object_visits = 0;
+    PyObject *phase_obj = PyUnicode_FromString(phase);
+    if (phase_obj == NULL) {
+        Py_XDECREF(info);
+        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+        return;
     }
-#endif
 
-    if (PyDTrace_GC_DONE_ENABLED()) {
-        PyDTrace_GC_DONE(n + m);
+    PyObject *stack[] = {phase_obj, info};
+    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
+        Py_INCREF(cb); /* make sure cb doesn't go away */
+        r = PyObject_Vectorcall(cb, stack, 2, NULL);
+        if (r == NULL) {
+            PyErr_WriteUnraisable(cb);
+        }
+        else {
+            Py_DECREF(r);
+        }
+        Py_DECREF(cb);
     }
+    Py_DECREF(phase_obj);
+    Py_XDECREF(info);
+    assert(!PyErr_Occurred());
+}
 
-    if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "stop", generation, m, n);
+static void
+invoke_gc_callback(GCState *gcstate, const char *phase,
+                   int generation, struct gc_collection_stats *stats)
+{
+    if (gcstate->callbacks == NULL) {
+        return;
     }
-
-    assert(!_PyErr_Occurred(tstate));
-    _Py_atomic_store_int(&gcstate->collecting, 0);
-    return n + m;
+    do_gc_callback(gcstate, phase, generation, stats);
 }
 
 static int
@@ -1549,7 +1666,7 @@ _PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
         }
     }
     else {
-        if (append_objects(result, GEN_HEAD(gcstate, generation))) {
+        if (append_objects(result, GEN_HEAD(gcstate, (int)generation))) {
             goto error;
         }
     }
@@ -1564,10 +1681,16 @@ void
 _PyGC_Freeze(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
-    for (int i = 0; i < NUM_GENERATIONS; ++i) {
-        gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
-        gcstate->generations[i].count = 0;
-    }
+    gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
+    gcstate->young.count = 0;
+    PyGC_Head*old0 = &gcstate->old[0].head;
+    PyGC_Head*old1 = &gcstate->old[1].head;
+    gc_list_merge(old0, &gcstate->permanent_generation.head);
+    gcstate->old[0].count = 0;
+    gc_list_set_space(old1, 0);
+    gc_list_merge(old1, &gcstate->permanent_generation.head);
+    gcstate->old[1].count = 0;
+    validate_old(gcstate);
 }
 
 void
@@ -1575,7 +1698,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
     gc_list_merge(&gcstate->permanent_generation.head,
-                  GEN_HEAD(gcstate, NUM_GENERATIONS-1));
+                  &gcstate->old[0].head);
+    validate_old(gcstate);
 }
 
 Py_ssize_t
@@ -1611,32 +1735,100 @@ PyGC_IsEnabled(void)
     return gcstate->enabled;
 }
 
-/* Public API to invoke gc.collect() from C */
+// Show stats for objects in each generations
+static void
+show_stats_each_generations(GCState *gcstate)
+{
+    char buf[100];
+    size_t pos = 0;
+
+    for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
+        pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
+                             " %zd",
+                             gc_list_size(GEN_HEAD(gcstate, i)));
+    }
+
+    PySys_FormatStderr(
+        "gc: objects in each generation:%s\n"
+        "gc: objects in permanent generation: %zd\n",
+        buf, gc_list_size(&gcstate->permanent_generation.head));
+}
+
 Py_ssize_t
-PyGC_Collect(void)
+_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
     GCState *gcstate = &tstate->interp->gc;
 
-    if (!gcstate->enabled) {
+    int expected = 0;
+    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
+        // Don't start a garbage collection if one is already in progress.
         return 0;
     }
 
-    Py_ssize_t n;
+    struct gc_collection_stats stats = { 0 };
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(gcstate, "start", generation, &stats);
+    }
+    _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
+    if (gcstate->debug & _PyGC_DEBUG_STATS) {
+        PySys_WriteStderr("gc: collecting generation %d...\n", generation);
+        show_stats_each_generations(gcstate);
+        t1 = _PyTime_GetPerfCounter();
+    }
+    if (PyDTrace_GC_START_ENABLED()) {
+        PyDTrace_GC_START(generation);
+    }
+    GC_STAT_ADD(generation, collections, 1);
     PyObject *exc = _PyErr_GetRaisedException(tstate);
-    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
+    switch(generation) {
+        case 0:
+            gc_collect_young(tstate, &stats);
+            break;
+        case 1:
+            gc_collect_young(tstate, &stats);
+            gc_collect_increment(tstate, &stats);
+            break;
+        case 2:
+            gc_collect_full(tstate, &stats);
+            break;
+        default:
+            Py_UNREACHABLE();
+    }
+    if (PyDTrace_GC_DONE_ENABLED()) {
+        PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
+    }
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(gcstate, "stop", generation, &stats);
+    }
     _PyErr_SetRaisedException(tstate, exc);
+    GC_STAT_ADD(generation, objects_collected, stats.collected);
+#ifdef Py_STATS
+    if (_Py_stats) {
+        GC_STAT_ADD(generation, object_visits,
+            _Py_stats->object_stats.object_visits);
+        _Py_stats->object_stats.object_visits = 0;
+    }
+#endif
+    validate_old(gcstate);
+    if (gcstate->debug & _PyGC_DEBUG_STATS) {
+        double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
+        PySys_WriteStderr(
+            "gc: done, %zd collected, %zd uncollectable, %.4fs elapsed\n",
+            stats.collected, stats.uncollectable, d);
+    }
 
-    return n;
+    _Py_atomic_store_int(&gcstate->collecting, 0);
+    return stats.uncollectable + stats.collected;
 }
 
+/* Public API to invoke gc.collect() from C */
 Py_ssize_t
-_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+PyGC_Collect(void)
 {
-    return gc_collect_main(tstate, generation, reason);
+    return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL);
 }
 
-Py_ssize_t
+void
 _PyGC_CollectNoFail(PyThreadState *tstate)
 {
     /* Ideally, this function is only called on interpreter shutdown,
@@ -1645,7 +1837,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
+    _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN);
 }
 
 void
@@ -1780,10 +1972,10 @@ _PyObject_GC_Link(PyObject *op)
     GCState *gcstate = &tstate->interp->gc;
     g->_gc_next = 0;
     g->_gc_prev = 0;
-    gcstate->generations[0].count++; /* number of allocated GC objects */
-    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
+    gcstate->young.count++; /* number of allocated GC objects */
+    if (gcstate->young.count > gcstate->young.threshold &&
         gcstate->enabled &&
-        gcstate->generations[0].threshold &&
+        gcstate->young.threshold &&
         !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
         !_PyErr_Occurred(tstate))
     {
@@ -1794,7 +1986,9 @@ _PyObject_GC_Link(PyObject *op)
 void
 _Py_RunGC(PyThreadState *tstate)
 {
-    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
+    if (tstate->interp->gc.enabled) {
+        _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP);
+    }
 }
 
 static PyObject *
@@ -1897,8 +2091,8 @@ PyObject_GC_Del(void *op)
 #endif
     }
     GCState *gcstate = get_gc_state();
-    if (gcstate->generations[0].count > 0) {
-        gcstate->generations[0].count--;
+    if (gcstate->young.count > 0) {
+        gcstate->young.count--;
     }
     PyObject_Free(((char *)op)-presize);
 }
@@ -1921,26 +2115,36 @@ PyObject_GC_IsFinalized(PyObject *obj)
     return 0;
 }
 
+static int
+visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
+{
+    PyGC_Head *gc_list, *gc;
+    gc_list = &gen->head;
+    for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
+        PyObject *op = FROM_GC(gc);
+        Py_INCREF(op);
+        int res = callback(op, arg);
+        Py_DECREF(op);
+        if (!res) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 void
 PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
 {
-    size_t i;
     GCState *gcstate = get_gc_state();
     int origenstate = gcstate->enabled;
     gcstate->enabled = 0;
-    for (i = 0; i < NUM_GENERATIONS; i++) {
-        PyGC_Head *gc_list, *gc;
-        gc_list = GEN_HEAD(gcstate, i);
-        for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
-            PyObject *op = FROM_GC(gc);
-            Py_INCREF(op);
-            int res = callback(op, arg);
-            Py_DECREF(op);
-            if (!res) {
-                goto done;
-            }
-        }
+    if (visit_generation(callback, arg, &gcstate->young)) {
+        goto done;
+    }
+    if (visit_generation(callback, arg, &gcstate->old[0])) {
+        goto done;
     }
+    visit_generation(callback, arg, &gcstate->old[1]);
 done:
     gcstate->enabled = origenstate;
 }
index 8fbcdb15109b76c747a6733166cf0957de7f465b..1c4da726866e4e117af7068357dbec4dee94a84d 100644 (file)
@@ -616,7 +616,7 @@ void
 _PyGC_InitState(GCState *gcstate)
 {
     // TODO: move to pycore_runtime_init.h once the incremental GC lands.
-    gcstate->generations[0].threshold = 2000;
+    gcstate->young.threshold = 2000;
 }
 
 
@@ -911,8 +911,8 @@ cleanup_worklist(struct worklist *worklist)
 static bool
 gc_should_collect(GCState *gcstate)
 {
-    int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
-    int threshold = gcstate->generations[0].threshold;
+    int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
+    int threshold = gcstate->young.threshold;
     if (count <= threshold || threshold == 0 || !gcstate->enabled) {
         return false;
     }
@@ -920,7 +920,7 @@ gc_should_collect(GCState *gcstate)
     // objects. A few tests rely on immediate scheduling of the GC so we ignore
     // the scaled threshold if generations[1].threshold is set to zero.
     return (count > gcstate->long_lived_total / 4 ||
-            gcstate->generations[1].threshold == 0);
+            gcstate->old[0].threshold == 0);
 }
 
 static void
@@ -1031,10 +1031,15 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
 
     /* update collection and allocation counters */
     if (generation+1 < NUM_GENERATIONS) {
-        gcstate->generations[generation+1].count += 1;
+        gcstate->old[generation].count += 1;
     }
     for (i = 0; i <= generation; i++) {
-        gcstate->generations[i].count = 0;
+        if (i == 0) {
+            gcstate->young.count = 0;
+        }
+        else {
+            gcstate->old[i-1].count = 0;
+        }
     }
 
     PyInterpreterState *interp = tstate->interp;
@@ -1357,7 +1362,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     return gc_collect_main(tstate, generation, reason);
 }
 
-Py_ssize_t
+void
 _PyGC_CollectNoFail(PyThreadState *tstate)
 {
     /* Ideally, this function is only called on interpreter shutdown,
@@ -1366,7 +1371,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
+    gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
 }
 
 void
@@ -1490,7 +1495,7 @@ _PyObject_GC_Link(PyObject *op)
 {
     PyThreadState *tstate = _PyThreadState_GET();
     GCState *gcstate = &tstate->interp->gc;
-    gcstate->generations[0].count++;
+    gcstate->young.count++;
 
     if (gc_should_collect(gcstate) &&
         !_Py_atomic_load_int_relaxed(&gcstate->collecting))
@@ -1605,8 +1610,8 @@ PyObject_GC_Del(void *op)
 #endif
     }
     GCState *gcstate = get_gc_state();
-    if (gcstate->generations[0].count > 0) {
-        gcstate->generations[0].count--;
+    if (gcstate->young.count > 0) {
+        gcstate->young.count--;
     }
     PyObject_Free(((char *)op)-presize);
 }
index 2fd0c08a6bb5aec34e6452563ec652d0c83d06b6..dfc5ec1f2f292725586812dbacd121ac7815f4ff 100644 (file)
@@ -1030,7 +1030,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
     if (!already_set) {
         /* We assume that all module defs are statically allocated
            and will never be freed.  Otherwise, we would incref here. */
-        _Py_SetImmortal(def);
+        _Py_SetImmortal((PyObject *)def);
     }
     res = 0;
 
index 483f28b46dfec742ea506747f02f37a941604a9e..96b891481d9f4628048860186c9253c8a40a1ac0 100755 (executable)
@@ -1753,8 +1753,11 @@ class Frame(object):
             return (name == 'take_gil')
 
     def is_gc_collect(self):
-        '''Is this frame gc_collect_main() within the garbage-collector?'''
-        return self._gdbframe.name() in ('collect', 'gc_collect_main')
+        '''Is this frame a collector within the garbage-collector?'''
+        return self._gdbframe.name() in (
+            'collect', 'gc_collect_full', 'gc_collect_main',
+            'gc_collect_young', 'gc_collect_increment'
+        )
 
     def get_pyop(self):
         try: