]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-112529: Use atomic operations for `gcstate->collecting` (#112533)
authorSam Gross <colesbury@gmail.com>
Mon, 11 Dec 2023 18:33:21 +0000 (13:33 -0500)
committerGitHub <noreply@github.com>
Mon, 11 Dec 2023 18:33:21 +0000 (18:33 +0000)
* gh-112529: Use atomic operations for `gcstate->collecting`

The `collecting` field in `GCState` is used to prevent overlapping garbage
collections within the same interpreter. This is updated to use atomic
operations in order to be thread-safe in `--disable-gil` builds.

The GC code is refactored a bit to support this. More of the logic is pushed
down to `gc_collect_main()` so that we can safely order the logic setting
`collecting`, the selection of the generation, and the invocation of callbacks
with respect to the atomic operations and the (future) stop-the-world pauses.

The change uses atomic operations for both `--disable-gil` and the default
build (with the GIL) to avoid extra `#ifdef` guards and ease the maintenance
burden.

Modules/gcmodule.c

index 8233fc56159b600a6abcbc16f03f4a3593041c16..2d1f381e622226716b9299d864bb28c11f3abb47 100644 (file)
@@ -74,6 +74,20 @@ module gc
 #define AS_GC(op) _Py_AS_GC(op)
 #define FROM_GC(gc) _Py_FROM_GC(gc)
 
+// Automatically choose the generation that needs collecting.
+#define GENERATION_AUTO (-1)
+
+typedef enum {
+    // GC was triggered by heap allocation
+    _Py_GC_REASON_HEAP,
+
+    // GC was called during shutdown
+    _Py_GC_REASON_SHUTDOWN,
+
+    // GC was called by gc.collect() or PyGC_Collect()
+    _Py_GC_REASON_MANUAL
+} _PyGC_Reason;
+
 
 static inline int
 gc_is_collecting(PyGC_Head *g)
@@ -1194,19 +1208,122 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
     gc_list_merge(resurrected, old_generation);
 }
 
+
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+invoke_gc_callback(PyThreadState *tstate, const char *phase,
+                   int generation, Py_ssize_t collected,
+                   Py_ssize_t uncollectable)
+{
+    assert(!_PyErr_Occurred(tstate));
+
+    /* we may get called very early */
+    GCState *gcstate = &tstate->interp->gc;
+    if (gcstate->callbacks == NULL) {
+        return;
+    }
+
+    /* The local variable cannot be rebound, check it for sanity */
+    assert(PyList_CheckExact(gcstate->callbacks));
+    PyObject *info = NULL;
+    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
+        info = Py_BuildValue("{sisnsn}",
+            "generation", generation,
+            "collected", collected,
+            "uncollectable", uncollectable);
+        if (info == NULL) {
+            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+            return;
+        }
+    }
+
+    PyObject *phase_obj = PyUnicode_FromString(phase);
+    if (phase_obj == NULL) {
+        Py_XDECREF(info);
+        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+        return;
+    }
+
+    PyObject *stack[] = {phase_obj, info};
+    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
+        Py_INCREF(cb); /* make sure cb doesn't go away */
+        r = PyObject_Vectorcall(cb, stack, 2, NULL);
+        if (r == NULL) {
+            PyErr_WriteUnraisable(cb);
+        }
+        else {
+            Py_DECREF(r);
+        }
+        Py_DECREF(cb);
+    }
+    Py_DECREF(phase_obj);
+    Py_XDECREF(info);
+    assert(!_PyErr_Occurred(tstate));
+}
+
+
+/* Find the oldest generation (highest numbered) where the count
+ * exceeds the threshold.  Objects in the that generation and
+ * generations younger than it will be collected. */
+static int
+gc_select_generation(GCState *gcstate)
+{
+    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
+        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
+            /* Avoid quadratic performance degradation in number
+               of tracked objects (see also issue #4074):
+
+               To limit the cost of garbage collection, there are two strategies;
+                 - make each collection faster, e.g. by scanning fewer objects
+                 - do less collections
+               This heuristic is about the latter strategy.
+
+               In addition to the various configurable thresholds, we only trigger a
+               full collection if the ratio
+
+                long_lived_pending / long_lived_total
+
+               is above a given value (hardwired to 25%).
+
+               The reason is that, while "non-full" collections (i.e., collections of
+               the young and middle generations) will always examine roughly the same
+               number of objects -- determined by the aforementioned thresholds --,
+               the cost of a full collection is proportional to the total number of
+               long-lived objects, which is virtually unbounded.
+
+               Indeed, it has been remarked that doing a full collection every
+               <constant number> of object creations entails a dramatic performance
+               degradation in workloads which consist in creating and storing lots of
+               long-lived objects (e.g. building a large list of GC-tracked objects would
+               show quadratic performance, instead of linear as expected: see issue #4074).
+
+               Using the above ratio, instead, yields amortized linear performance in
+               the total number of objects (the effect of which can be summarized
+               thusly: "each full garbage collection is more and more costly as the
+               number of objects grows, but we do fewer and fewer of them").
+
+               This heuristic was suggested by Martin von Löwis on python-dev in
+               June 2008. His original analysis and proposal can be found at:
+               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
+            */
+            if (i == NUM_GENERATIONS - 1
+                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
+                continue;
+            return i;
+        }
+    }
+    return -1;
+}
+
+
 /* This is the main function.  Read this to understand how the
  * collection process works. */
 static Py_ssize_t
-gc_collect_main(PyThreadState *tstate, int generation,
-                Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
-                int nofail)
+gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
 {
-    GC_STAT_ADD(generation, collections, 1);
-#ifdef Py_STATS
-    if (_Py_stats) {
-        _Py_stats->object_stats.object_visits = 0;
-    }
-#endif
     int i;
     Py_ssize_t m = 0; /* # objects collected */
     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
@@ -1223,6 +1340,36 @@ gc_collect_main(PyThreadState *tstate, int generation,
     assert(gcstate->garbage != NULL);
     assert(!_PyErr_Occurred(tstate));
 
+    int expected = 0;
+    if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
+        // Don't start a garbage collection if one is already in progress.
+        return 0;
+    }
+
+    if (generation == GENERATION_AUTO) {
+        // Select the oldest generation that needs collecting. We will collect
+        // objects from that generation and all generations younger than it.
+        generation = gc_select_generation(gcstate);
+        if (generation < 0) {
+            // No generation needs to be collected.
+            _Py_atomic_store_int(&gcstate->collecting, 0);
+            return 0;
+        }
+    }
+
+    assert(generation >= 0 && generation < NUM_GENERATIONS);
+
+#ifdef Py_STATS
+    if (_Py_stats) {
+        _Py_stats->object_stats.object_visits = 0;
+    }
+#endif
+    GC_STAT_ADD(generation, collections, 1);
+
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(tstate, "start", generation, 0, 0);
+    }
+
     if (gcstate->debug & DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
         show_stats_each_generations(gcstate);
@@ -1342,7 +1489,7 @@ gc_collect_main(PyThreadState *tstate, int generation,
     }
 
     if (_PyErr_Occurred(tstate)) {
-        if (nofail) {
+        if (reason == _Py_GC_REASON_SHUTDOWN) {
             _PyErr_Clear(tstate);
         }
         else {
@@ -1351,13 +1498,6 @@ gc_collect_main(PyThreadState *tstate, int generation,
     }
 
     /* Update stats */
-    if (n_collected) {
-        *n_collected = m;
-    }
-    if (n_uncollectable) {
-        *n_uncollectable = n;
-    }
-
     struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
     stats->collections++;
     stats->collected += m;
@@ -1376,134 +1516,13 @@ gc_collect_main(PyThreadState *tstate, int generation,
         PyDTrace_GC_DONE(n + m);
     }
 
-    assert(!_PyErr_Occurred(tstate));
-    return n + m;
-}
-
-/* Invoke progress callbacks to notify clients that garbage collection
- * is starting or stopping
- */
-static void
-invoke_gc_callback(PyThreadState *tstate, const char *phase,
-                   int generation, Py_ssize_t collected,
-                   Py_ssize_t uncollectable)
-{
-    assert(!_PyErr_Occurred(tstate));
-
-    /* we may get called very early */
-    GCState *gcstate = &tstate->interp->gc;
-    if (gcstate->callbacks == NULL) {
-        return;
-    }
-
-    /* The local variable cannot be rebound, check it for sanity */
-    assert(PyList_CheckExact(gcstate->callbacks));
-    PyObject *info = NULL;
-    if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsn}",
-            "generation", generation,
-            "collected", collected,
-            "uncollectable", uncollectable);
-        if (info == NULL) {
-            PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
-            return;
-        }
+    if (reason != _Py_GC_REASON_SHUTDOWN) {
+        invoke_gc_callback(tstate, "stop", generation, m, n);
     }
 
-    PyObject *phase_obj = PyUnicode_FromString(phase);
-    if (phase_obj == NULL) {
-        Py_XDECREF(info);
-        PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
-        return;
-    }
-
-    PyObject *stack[] = {phase_obj, info};
-    for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
-        Py_INCREF(cb); /* make sure cb doesn't go away */
-        r = PyObject_Vectorcall(cb, stack, 2, NULL);
-        if (r == NULL) {
-            PyErr_WriteUnraisable(cb);
-        }
-        else {
-            Py_DECREF(r);
-        }
-        Py_DECREF(cb);
-    }
-    Py_DECREF(phase_obj);
-    Py_XDECREF(info);
-    assert(!_PyErr_Occurred(tstate));
-}
-
-/* Perform garbage collection of a generation and invoke
- * progress callbacks.
- */
-static Py_ssize_t
-gc_collect_with_callback(PyThreadState *tstate, int generation)
-{
     assert(!_PyErr_Occurred(tstate));
-    Py_ssize_t result, collected, uncollectable;
-    invoke_gc_callback(tstate, "start", generation, 0, 0);
-    result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
-    invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
-    assert(!_PyErr_Occurred(tstate));
-    return result;
-}
-
-static Py_ssize_t
-gc_collect_generations(PyThreadState *tstate)
-{
-    GCState *gcstate = &tstate->interp->gc;
-    /* Find the oldest generation (highest numbered) where the count
-     * exceeds the threshold.  Objects in the that generation and
-     * generations younger than it will be collected. */
-    Py_ssize_t n = 0;
-    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
-            /* Avoid quadratic performance degradation in number
-               of tracked objects (see also issue #4074):
-
-               To limit the cost of garbage collection, there are two strategies;
-                 - make each collection faster, e.g. by scanning fewer objects
-                 - do less collections
-               This heuristic is about the latter strategy.
-
-               In addition to the various configurable thresholds, we only trigger a
-               full collection if the ratio
-
-                long_lived_pending / long_lived_total
-
-               is above a given value (hardwired to 25%).
-
-               The reason is that, while "non-full" collections (i.e., collections of
-               the young and middle generations) will always examine roughly the same
-               number of objects -- determined by the aforementioned thresholds --,
-               the cost of a full collection is proportional to the total number of
-               long-lived objects, which is virtually unbounded.
-
-               Indeed, it has been remarked that doing a full collection every
-               <constant number> of object creations entails a dramatic performance
-               degradation in workloads which consist in creating and storing lots of
-               long-lived objects (e.g. building a large list of GC-tracked objects would
-               show quadratic performance, instead of linear as expected: see issue #4074).
-
-               Using the above ratio, instead, yields amortized linear performance in
-               the total number of objects (the effect of which can be summarized
-               thusly: "each full garbage collection is more and more costly as the
-               number of objects grows, but we do fewer and fewer of them").
-
-               This heuristic was suggested by Martin von Löwis on python-dev in
-               June 2008. His original analysis and proposal can be found at:
-               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-            */
-            if (i == NUM_GENERATIONS - 1
-                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
-                continue;
-            n = gc_collect_with_callback(tstate, i);
-            break;
-        }
-    }
-    return n;
+    _Py_atomic_store_int(&gcstate->collecting, 0);
+    return n + m;
 }
 
 #include "clinic/gcmodule.c.h"
@@ -1574,18 +1593,7 @@ gc_collect_impl(PyObject *module, int generation)
         return -1;
     }
 
-    GCState *gcstate = &tstate->interp->gc;
-    Py_ssize_t n;
-    if (gcstate->collecting) {
-        /* already collecting, don't do anything */
-        n = 0;
-    }
-    else {
-        gcstate->collecting = 1;
-        n = gc_collect_with_callback(tstate, generation);
-        gcstate->collecting = 0;
-    }
-    return n;
+    return gc_collect_main(tstate, generation, _Py_GC_REASON_MANUAL);
 }
 
 /*[clinic input]
@@ -2124,17 +2132,9 @@ PyGC_Collect(void)
     }
 
     Py_ssize_t n;
-    if (gcstate->collecting) {
-        /* already collecting, don't do anything */
-        n = 0;
-    }
-    else {
-        gcstate->collecting = 1;
-        PyObject *exc = _PyErr_GetRaisedException(tstate);
-        n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
-        _PyErr_SetRaisedException(tstate, exc);
-        gcstate->collecting = 0;
-    }
+    PyObject *exc = _PyErr_GetRaisedException(tstate);
+    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
+    _PyErr_SetRaisedException(tstate, exc);
 
     return n;
 }
@@ -2148,16 +2148,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    GCState *gcstate = &tstate->interp->gc;
-    if (gcstate->collecting) {
-        return 0;
-    }
-
-    Py_ssize_t n;
-    gcstate->collecting = 1;
-    n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
-    gcstate->collecting = 0;
-    return n;
+    return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
 }
 
 void
@@ -2275,10 +2266,6 @@ PyObject_IS_GC(PyObject *obj)
 void
 _Py_ScheduleGC(PyInterpreterState *interp)
 {
-    GCState *gcstate = &interp->gc;
-    if (gcstate->collecting == 1) {
-        return;
-    }
     _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
 }
 
@@ -2296,7 +2283,7 @@ _PyObject_GC_Link(PyObject *op)
     if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
         gcstate->enabled &&
         gcstate->generations[0].threshold &&
-        !gcstate->collecting &&
+        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
         !_PyErr_Occurred(tstate))
     {
         _Py_ScheduleGC(tstate->interp);
@@ -2306,10 +2293,7 @@ _PyObject_GC_Link(PyObject *op)
 void
 _Py_RunGC(PyThreadState *tstate)
 {
-    GCState *gcstate = &tstate->interp->gc;
-    gcstate->collecting = 1;
-    gc_collect_generations(tstate);
-    gcstate->collecting = 0;
+    gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
 }
 
 static PyObject *