]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-148726: Add heap_size to generational GC (#149195)
authorSergey Miryanov <sergey.miryanov@gmail.com>
Mon, 4 May 2026 21:14:45 +0000 (02:14 +0500)
committerGitHub <noreply@github.com>
Mon, 4 May 2026 21:14:45 +0000 (22:14 +0100)
Include/internal/pycore_gc.h
Include/internal/pycore_interp_structs.h
Lib/test/test_gc.py
Lib/test/test_gc_stats.py
Modules/_remote_debugging/clinic/module.c.h
Modules/_remote_debugging/gc_stats.c
Modules/_remote_debugging/module.c
Modules/_testinternalcapi.c
Python/gc.c

index e105677cd2e674a4a84cf6a2ef60f4ee67495782..bfe52f42f1141ccb7a33bdb6cb6e6d011e7d5e9e 100644 (file)
@@ -223,12 +223,14 @@ static inline void _PyObject_GC_TRACK(
                           "object is in generation which is garbage collected",
                           filename, lineno, __func__);
 
-    PyGC_Head *generation0 = _PyInterpreterState_GET()->gc.generation0;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    PyGC_Head *generation0 = gcstate->generation0;
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
     _PyGCHead_SET_NEXT(last, gc);
     _PyGCHead_SET_PREV(gc, last);
     _PyGCHead_SET_NEXT(gc, generation0);
     generation0->_gc_prev = (uintptr_t)gc;
+    gcstate->heap_size++;
 #endif
 }
 
@@ -263,6 +265,8 @@ static inline void _PyObject_GC_UNTRACK(
     _PyGCHead_SET_PREV(next, prev);
     gc->_gc_next = 0;
     gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    gcstate->heap_size--;
 #endif
 }
 
index 86f018e328656e217b8a624b53997570f27277bc..2d04c173e85abe0526dda01753d4fb8d26a5a321 100644 (file)
@@ -191,6 +191,8 @@ struct gc_generation_stats {
     Py_ssize_t candidates;
     // Total duration of the collection in seconds:
     double duration;
+    /* heap_size on the start of the collection */
+    Py_ssize_t heap_size;
 };
 
 #ifdef Py_GIL_DISABLED
@@ -226,7 +228,6 @@ struct _gc_runtime_state {
     /* linked lists of container objects */
 #ifndef Py_GIL_DISABLED
     struct gc_generation generations[NUM_GENERATIONS];
-    PyGC_Head *generation0;
 #else
     struct gc_generation young;
     struct gc_generation old[2];
@@ -244,6 +245,9 @@ struct _gc_runtime_state {
     /* a list of callbacks to be invoked when collection is performed */
     PyObject *callbacks;
 
+    /* The number of live objects. */
+    Py_ssize_t heap_size;
+
     /* This is the number of objects that survived the last full
        collection. It approximates the number of long lived objects
        tracked by the GC.
@@ -269,6 +273,8 @@ struct _gc_runtime_state {
 
     /* Mutex held for gc_should_collect_mem_usage(). */
     PyMutex mutex;
+#else
+    PyGC_Head *generation0;
 #endif
 };
 
@@ -278,7 +284,8 @@ struct _gc_runtime_state {
         { .threshold = 2000, }, \
         { .threshold = 10, }, \
         { .threshold = 10, }, \
-    },
+    }, \
+    .heap_size = 0,
 #else
 #define GC_GENERATION_INIT \
     .young = { .threshold = 2000, }, \
index 88d265cbc21709d059a8b8a196b4c38026f7c9b6..3fc084ea6e9c6e9940347b2ba677a91cdb83fe5f 100644 (file)
@@ -1288,6 +1288,15 @@ class GCTests(unittest.TestCase):
         # Use n // 2 just in case some other objects were collected.
         self.assertTrue(new_count - count > (n // 2))
 
+    @requires_gil_enabled('need generational GC')
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+    def test_heap_size(self):
+        count = _testinternalcapi.get_tracked_heap_size()
+        l = []
+        self.assertEqual(count + 1, _testinternalcapi.get_tracked_heap_size())
+        del l
+        self.assertEqual(count, _testinternalcapi.get_tracked_heap_size())
+
 
 class GCCallbackTests(unittest.TestCase):
     def setUp(self):
index 59365ad45b32c9f5caa64db2481ddb69cf9fc4b5..bd75924397e76e8a3035cbc89e2e5759d231550a 100644 (file)
@@ -22,7 +22,7 @@ except ImportError:
 
 GC_STATS_FIELDS = (
     "gen", "iid", "ts_start", "ts_stop", "collections", "collected",
-    "uncollectable", "candidates", "duration")
+    "uncollectable", "candidates", "heap_size", "duration")
 
 
 def get_interpreter_identifiers(gc_stats) -> tuple[int,...]:
index 179a7b97dd4e2f5fa25830ef7037a5918adecfc5..1133db808efaec3439806eb72e29b0f35ecebea4 100644 (file)
@@ -601,6 +601,7 @@ PyDoc_STRVAR(_remote_debugging_GCMonitor_get_gc_stats__doc__,
 "        - collected: Total number of collected objects.\n"
 "        - uncollectable: Total number of uncollectable objects.\n"
 "        - candidates: Total objects considered and traversed.\n"
+"        - heap_size: number of live objects.\n"
 "        - duration: Total collection time, in seconds.\n"
 "\n"
 "Raises:\n"
@@ -1563,4 +1564,4 @@ skip_optional_kwonly:
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=1151e58683dab9f4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=36674f4cb8a653f3 input=a9049054013a1b77]*/
index 852dc866153192ca9619f9c0d87b70210faac204..d5d05edb8ecf5ee97e2330cffd82f7b8864f6fff 100644 (file)
@@ -53,6 +53,7 @@ read_gc_stats(struct gc_stats *stats, int64_t iid, PyObject *result,
             SET_FIELD(PyLong_FromSsize_t, items->collected);
             SET_FIELD(PyLong_FromSsize_t, items->uncollectable);
             SET_FIELD(PyLong_FromSsize_t, items->candidates);
+            SET_FIELD(PyLong_FromSsize_t, items->heap_size);
 
             SET_FIELD(PyFloat_FromDouble, items->duration);
 
index c840c59971c4786d5fa72c56ae16ab9768579836..c694e587e7cccbc42c116c9a39adb1f834ef2247 100644 (file)
@@ -143,6 +143,7 @@ static PyStructSequence_Field GCStatsInfo_fields[] = {
     {"collected", "Total number of collected objects"},
     {"uncollectable", "Total number of uncollectable objects"},
     {"candidates", "Total objects considered and traversed"},
+    {"heap_size", "Number of live objects"},
     {"duration", "Total collection time, in seconds"},
     {NULL}
 };
@@ -151,7 +152,7 @@ PyStructSequence_Desc GCStatsInfo_desc = {
     "_remote_debugging.GCStatsInfo",
     "Information about a garbage collector stats sample",
     GCStatsInfo_fields,
-    9
+    10
 };
 
 /* ============================================================================
@@ -1225,6 +1226,7 @@ Returns:
         - collected: Total number of collected objects.
         - uncollectable: Total number of uncollectable objects.
         - candidates: Total objects considered and traversed.
+        - heap_size: number of live objects.
         - duration: Total collection time, in seconds.
 
 Raises:
@@ -1235,7 +1237,7 @@ Raises:
 static PyObject *
 _remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self,
                                               int all_interpreters)
-/*[clinic end generated code: output=f73f365725224f7a input=09e647719c65f9e4]*/
+/*[clinic end generated code: output=f73f365725224f7a input=12f7c1a288cf2741]*/
 {
     RemoteDebuggingState *st = RemoteDebugging_GetStateFromType(Py_TYPE(self));
     return get_gc_stats(&self->offsets, all_interpreters, st->GCStatsInfo_Type);
index a07675bb66d8cc8dbfd596a15ff8340e9eb89d76..d85b9eb5f7da89c3ed8ecd4a08e6ec1b45d9c17c 100644 (file)
@@ -2731,8 +2731,7 @@ has_deferred_refcount(PyObject *self, PyObject *op)
 static PyObject *
 get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
-    // Generational GC doesn't track heap_size, return -1.
-    return PyLong_FromInt64(-1);
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
 }
 
 static PyObject *
index 134da107e1b61dc50aefc5d97c1fc75c7e4c9b6c..54ac1b089e503d051374d87e9b597d70aa699be6 100644 (file)
@@ -1405,13 +1405,13 @@ add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats)
     memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats));
 
     cur_stats->ts_start = stats->ts_start;
-
     cur_stats->collections += 1;
     cur_stats->collected += stats->collected;
     cur_stats->uncollectable += stats->uncollectable;
     cur_stats->candidates += stats->candidates;
 
     cur_stats->duration += stats->duration;
+    cur_stats->heap_size = stats->heap_size;
     /* Publish ts_stop last so remote readers do not select a partially
        updated stats record as the latest collection. */
     cur_stats->ts_stop = stats->ts_stop;
@@ -1471,6 +1471,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
         invoke_gc_callback(tstate, "start", generation, &stats);
     }
 
+    stats.heap_size = gcstate->heap_size;
     // ignore error: don't interrupt the GC if reading the clock fails
     (void)PyTime_PerfCounterRaw(&stats.ts_start);
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
@@ -2097,6 +2098,8 @@ PyObject_GC_Del(void *op)
     PyGC_Head *g = AS_GC(op);
     if (_PyObject_GC_IS_TRACKED(op)) {
         gc_list_remove(g);
+        GCState *gcstate = get_gc_state();
+        gcstate->heap_size--;
 #ifdef Py_DEBUG
         PyObject *exc = PyErr_GetRaisedException();
         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,