]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-117108: Set the "old space bit" to "visited" for all young objects (#117213)
authorMark Shannon <mark@hotpy.org>
Tue, 26 Mar 2024 11:11:42 +0000 (11:11 +0000)
committerGitHub <noreply@github.com>
Tue, 26 Mar 2024 11:11:42 +0000 (11:11 +0000)
Change old space bit of young objects from 0 to gcstate->visited_space.
This ensures that any object created *and* collected during cycle GC has the bit set correctly.

Include/internal/pycore_gc.h
Include/internal/pycore_object.h
Lib/test/test_gc.py
Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst [new file with mode: 0644]
Python/gc.c

index e729616936f03b071366966c9f9d00a385ecd04f..c4482c4ffcfa60ce83d381753ee2da5fca5d6721 100644 (file)
@@ -113,7 +113,19 @@ static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
 /* Bit 1 is set when the object is in generation which is GCed currently. */
 #define _PyGC_PREV_MASK_COLLECTING 2
 
-/* Bit 0 is set if the object belongs to old space 1 */
+/* Bit 0 in _gc_next is the old space bit.
+ * It is set as follows:
+ * Young: gcstate->visited_space
+ * old[0]: 0
+ * old[1]: 1
+ * permanent: 0
+ *
+ * During a collection all objects handled should have the bit set to
+ * gcstate->visited_space, as objects are moved from the young gen
+ * and the increment into old[gcstate->visited_space].
+ * When object are moved from the pending space, old[gcstate->visited_space^1]
+ * into the increment, the old space bit is flipped.
+*/
 #define _PyGC_NEXT_MASK_OLD_SPACE_1    1
 
 #define _PyGC_PREV_SHIFT           2
index 13fe543133f11e0cbc4eb43ccd78ef8881fca2e0..0b17ddf0c973efa5de296a8ef651fb5c19bc37b7 100644 (file)
@@ -318,8 +318,8 @@ static inline void _PyObject_GC_TRACK(
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
     _PyGCHead_SET_NEXT(last, gc);
     _PyGCHead_SET_PREV(gc, last);
-    _PyGCHead_SET_NEXT(gc, generation0);
-    assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0);
+    /* Young objects will be moved into the visited space during GC, so set the bit here */
+    gc->_gc_next = ((uintptr_t)generation0) | interp->gc.visited_space;
     generation0->_gc_prev = (uintptr_t)gc;
 #endif
 }
index 57acbac5859e7f481c323f9b171229e316c60d27..3bf5c9ed41ee44b8093e3b405de00a5a0c2a316a 100644 (file)
@@ -823,32 +823,10 @@ class GCTests(unittest.TestCase):
         self.assertTrue(
                 any(l is element for element in gc.get_objects(generation=0))
         )
-        self.assertFalse(
-                any(l is element for element in  gc.get_objects(generation=1))
-        )
-        self.assertFalse(
-                any(l is element for element in gc.get_objects(generation=2))
-        )
-        gc.collect(generation=0)
-        self.assertFalse(
-                any(l is element for element in gc.get_objects(generation=0))
-        )
-        self.assertTrue(
-                any(l is element for element in  gc.get_objects(generation=1))
-        )
-        self.assertFalse(
-                any(l is element for element in gc.get_objects(generation=2))
-        )
-        gc.collect(generation=2)
+        gc.collect()
         self.assertFalse(
                 any(l is element for element in gc.get_objects(generation=0))
         )
-        self.assertFalse(
-                any(l is element for element in  gc.get_objects(generation=1))
-        )
-        self.assertTrue(
-                any(l is element for element in gc.get_objects(generation=2))
-        )
         del l
         gc.collect()
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst
new file mode 100644 (file)
index 0000000..a28c83e
--- /dev/null
@@ -0,0 +1,3 @@
+Change the old space bit of objects in the young generation from 0 to
+gcstate->visited, so that any objects created during GC will have the old
+bit set correctly if they get moved into the old generation.
index 6d86059174a8cd6750bbc95e114f4cc2a9a51b8c..36e20d05c205a5f2ff1217d72e23dfe3cdd10468 100644 (file)
@@ -455,10 +455,20 @@ validate_consistent_old_space(PyGC_Head *head)
     assert(prev == GC_PREV(head));
 }
 
+static void
+gc_list_validate_space(PyGC_Head *head, int space) {
+    PyGC_Head *gc = GC_NEXT(head);
+    while (gc != head) {
+        assert(gc_old_space(gc) == space);
+        gc = GC_NEXT(gc);
+    }
+}
+
 #else
 #define validate_list(x, y) do{}while(0)
 #define validate_old(g) do{}while(0)
 #define validate_consistent_old_space(l) do{}while(0)
+#define gc_list_validate_space(l, s) do{}while(0)
 #endif
 
 /*** end of list stuff ***/
@@ -949,6 +959,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
     /* Invoke the callbacks we decided to honor.  It's safe to invoke them
      * because they can't reference unreachable objects.
      */
+    int visited_space = get_gc_state()->visited_space;
     while (! gc_list_is_empty(&wrcb_to_call)) {
         PyObject *temp;
         PyObject *callback;
@@ -983,6 +994,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
         Py_DECREF(op);
         if (wrcb_to_call._gc_next == (uintptr_t)gc) {
             /* object is still alive -- move it */
+            gc_set_old_space(gc, visited_space);
             gc_list_move(gc, old);
         }
         else {
@@ -1389,6 +1401,14 @@ completed_cycle(GCState *gcstate)
     assert(gc_list_is_empty(not_visited));
 #endif
     gcstate->visited_space = flip_old_space(gcstate->visited_space);
+    /* Make sure all young objects have old space bit set correctly */
+    PyGC_Head *young = &gcstate->young.head;
+    PyGC_Head *gc = GC_NEXT(young);
+    while (gc != young) {
+        PyGC_Head *next = GC_NEXT(gc);
+        gc_set_old_space(gc, gcstate->visited_space);
+        gc = next;
+    }
     gcstate->work_to_do = 0;
 }
 
@@ -1406,10 +1426,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
     }
     gc_list_merge(&gcstate->young.head, &increment);
     gcstate->young.count = 0;
-    if (gcstate->visited_space) {
-        /* objects in visited space have bit set, so we set it here */
-        gc_list_set_space(&increment, 1);
-    }
+    gc_list_validate_space(&increment, gcstate->visited_space);
     Py_ssize_t increment_size = 0;
     while (increment_size < gcstate->work_to_do) {
         if (gc_list_is_empty(not_visited)) {
@@ -1421,9 +1438,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
         gc_set_old_space(gc, gcstate->visited_space);
         increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
     }
+    gc_list_validate_space(&increment, gcstate->visited_space);
     PyGC_Head survivors;
     gc_list_init(&survivors);
     gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
+    gc_list_validate_space(&survivors, gcstate->visited_space);
     gc_list_merge(&survivors, visited);
     assert(gc_list_is_empty(&increment));
     gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
@@ -1444,23 +1463,18 @@ gc_collect_full(PyThreadState *tstate,
     GCState *gcstate = &tstate->interp->gc;
     validate_old(gcstate);
     PyGC_Head *young = &gcstate->young.head;
-    PyGC_Head *old0 = &gcstate->old[0].head;
-    PyGC_Head *old1 = &gcstate->old[1].head;
-    /* merge all generations into old0 */
-    gc_list_merge(young, old0);
+    PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
+    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    /* merge all generations into visited */
+    gc_list_validate_space(young, gcstate->visited_space);
+    gc_list_set_space(pending, gcstate->visited_space);
+    gc_list_merge(young, pending);
     gcstate->young.count = 0;
-    PyGC_Head *gc = GC_NEXT(old1);
-    while (gc != old1) {
-        PyGC_Head *next = GC_NEXT(gc);
-        gc_set_old_space(gc, 0);
-        gc = next;
-    }
-    gc_list_merge(old1, old0);
+    gc_list_merge(pending, visited);
 
-    gc_collect_region(tstate, old0, old0,
+    gc_collect_region(tstate, visited, visited,
                       UNTRACK_TUPLES | UNTRACK_DICTS,
                       stats);
-    gcstate->visited_space = 1;
     gcstate->young.count = 0;
     gcstate->old[0].count = 0;
     gcstate->old[1].count = 0;
@@ -1527,6 +1541,7 @@ gc_collect_region(PyThreadState *tstate,
 
     /* Clear weakrefs and invoke callbacks as necessary. */
     stats->collected += handle_weakrefs(&unreachable, to);
+    gc_list_validate_space(to, gcstate->visited_space);
     validate_list(to, collecting_clear_unreachable_clear);
     validate_list(&unreachable, collecting_set_unreachable_clear);
 
@@ -1560,6 +1575,7 @@ gc_collect_region(PyThreadState *tstate,
      * this if they insist on creating this type of structure.
      */
     handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
+    gc_list_validate_space(to, gcstate->visited_space);
     validate_list(to, collecting_clear_unreachable_clear);
 }
 
@@ -1708,6 +1724,10 @@ void
 _PyGC_Freeze(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
+    /* The permanent_generation has its old space bit set to zero */
+    if (gcstate->visited_space) {
+        gc_list_set_space(&gcstate->young.head, 0);
+    }
     gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
     gcstate->young.count = 0;
     PyGC_Head*old0 = &gcstate->old[0].head;