[3.14] GH-139951: Fix major GC performance regression. Backport of GH-140262 (GH...

author Mark Shannon <mark@hotpy.org>

Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)

committer GitHub <noreply@github.com>

Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)
author Mark Shannon <mark@hotpy.org>
Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)
committer GitHub <noreply@github.com>
Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h

index a6519aa086309de688a0af111e311738e1ffd471..fd284d0e4ecc2f5a03004caa04afbc722419db06 100644 (file)
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -205,6 +205,12 @@ static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
  #endif
  }
  
+extern void _Py_ScheduleGC(PyThreadState *tstate);
+
+#ifndef Py_GIL_DISABLED
+extern void _Py_TriggerGC(struct _gc_runtime_state *gcstate);
+#endif
+
  
  /* Tell the GC to track this object.
   *
@@ -238,14 +244,19 @@ static inline void _PyObject_GC_TRACK(
                            "object is in generation which is garbage collected",
                            filename, lineno, __func__);
  
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    PyGC_Head *generation0 = &interp->gc.young.head;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    PyGC_Head *generation0 = &gcstate->young.head;
      PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
      _PyGCHead_SET_NEXT(last, gc);
      _PyGCHead_SET_PREV(gc, last);
-    uintptr_t not_visited = 1 ^ interp->gc.visited_space;
+    uintptr_t not_visited = 1 ^ gcstate->visited_space;
      gc->_gc_next = ((uintptr_t)generation0) | not_visited;
      generation0->_gc_prev = (uintptr_t)gc;
+    gcstate->young.count++; /* number of tracked GC objects */
+    gcstate->heap_size++;
+    if (gcstate->young.count > gcstate->young.threshold) {
+        _Py_TriggerGC(gcstate);
+    }
  #endif
  }
  
@@ -280,6 +291,11 @@ static inline void _PyObject_GC_UNTRACK(
      _PyGCHead_SET_PREV(next, prev);
      gc->_gc_next = 0;
      gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+    struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+    if (gcstate->young.count > 0) {
+        gcstate->young.count--;
+    }
+    gcstate->heap_size--;
  #endif
  }
  
@@ -343,7 +359,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);
  
  // Functions to clear types free lists
  extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
-extern void _Py_ScheduleGC(PyThreadState *tstate);
  extern void _Py_RunGC(PyThreadState *tstate);
  
  union _PyStackRef;
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py

index 71a4f035fbb02a51f1feb716712e5e3e3ec81bf6..a0bdf0955041fb17b9785d956fe089bf463626a9 100644 (file)
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1329,6 +1329,7 @@ class GCTogglingTests(unittest.TestCase):
      def tearDown(self):
          gc.disable()
  
+    @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments")
      def test_bug1055820c(self):
          # Corresponds to temp2c.py in the bug report.  This is pretty
          # elaborate.
@@ -1390,10 +1391,11 @@ class GCTogglingTests(unittest.TestCase):
              # The free-threaded build doesn't have multiple generations, so
              # just trigger a GC manually.
              gc.collect()
+        assert not detector.gc_happened
          while not detector.gc_happened:
              i += 1
-            if i > 10000:
-                self.fail("gc didn't happen after 10000 iterations")
+            if i > 100000:
+                self.fail("gc didn't happen after 100000 iterations")
              self.assertEqual(len(ouch), 0)
              junk.append([])  # this will eventually trigger gc
  
@@ -1464,8 +1466,8 @@ class GCTogglingTests(unittest.TestCase):
              gc.collect()
          while not detector.gc_happened:
              i += 1
-            if i > 10000:
-                self.fail("gc didn't happen after 10000 iterations")
+            if i > 50000:
+                self.fail("gc didn't happen after 50000 iterations")
              self.assertEqual(len(ouch), 0)
              junk.append([])  # this will eventually trigger gc
  
@@ -1482,8 +1484,8 @@ class GCTogglingTests(unittest.TestCase):
          detector = GC_Detector()
          while not detector.gc_happened:
              i += 1
-            if i > 10000:
-                self.fail("gc didn't happen after 10000 iterations")
+            if i > 100000:
+                self.fail("gc didn't happen after 100000 iterations")
              junk.append([])  # this will eventually trigger gc
  
          try:
@@ -1493,11 +1495,11 @@ class GCTogglingTests(unittest.TestCase):
              detector = GC_Detector()
              while not detector.gc_happened:
                  i += 1
-                if i > 10000:
+                if i > 100000:
                      break
                  junk.append([])  # this may eventually trigger gc (if it is enabled)
  
-            self.assertEqual(i, 10001)
+            self.assertEqual(i, 100001)
          finally:
              gc.enable()
  
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst

new file mode 100644 (file)

index 0000000..e039961
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst
@@ -0,0 +1,7 @@
+Fixes a regression in GC performance for a growing heap composed mostly of
+small tuples.
+
+*  Counts number of actually tracked objects, instead of trackable objects.
+   This ensures that untracking tuples has the desired effect of reducing GC overhead.
+*  Does not track most untrackable tuples during creation.
+   This prevents large numbers of small tuples causing excessive GCs.
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c

index 38a130d889d0c18cdee9f7964844392c4c7153a9..c950d0da70fb867f9c57ee26c80c725bce9234e4 100644 (file)
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -156,6 +156,18 @@ _PyTuple_MaybeUntrack(PyObject *op)
      _PyObject_GC_UNTRACK(op);
  }
  
+/* Fast, but conservative check if an object maybe tracked
+   May return true for an object that is not tracked,
+   Will always return true for an object that is tracked.
+   This is a temporary workaround until _PyObject_GC_IS_TRACKED
+   becomes fast and safe to call on non-GC objects.
+*/
+static bool
+maybe_tracked(PyObject *ob)
+{
+    return _PyType_IS_GC(Py_TYPE(ob));
+}
+
  PyObject *
  PyTuple_Pack(Py_ssize_t n, ...)
  {
@@ -163,6 +175,7 @@ PyTuple_Pack(Py_ssize_t n, ...)
      PyObject *o;
      PyObject **items;
      va_list vargs;
+    bool track = false;
  
      if (n == 0) {
          return tuple_get_empty();
@@ -177,10 +190,15 @@ PyTuple_Pack(Py_ssize_t n, ...)
      items = result->ob_item;
      for (i = 0; i < n; i++) {
          o = va_arg(vargs, PyObject *);
+        if (!track && maybe_tracked(o)) {
+            track = true;
+        }
          items[i] = Py_NewRef(o);
      }
      va_end(vargs);
-    _PyObject_GC_TRACK(result);
+    if (track) {
+        _PyObject_GC_TRACK(result);
+    }
      return (PyObject *)result;
  }
  
@@ -377,11 +395,17 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
          return NULL;
      }
      PyObject **dst = tuple->ob_item;
+    bool track = false;
      for (Py_ssize_t i = 0; i < n; i++) {
          PyObject *item = src[i];
+        if (!track && maybe_tracked(item)) {
+            track = true;
+        }
          dst[i] = Py_NewRef(item);
      }
-    _PyObject_GC_TRACK(tuple);
+    if (track) {
+        _PyObject_GC_TRACK(tuple);
+    }
      return (PyObject *)tuple;
  }
  
@@ -396,10 +420,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef *src, Py_ssize_t n)
          return NULL;
      }
      PyObject **dst = tuple->ob_item;
+    bool track = false;
      for (Py_ssize_t i = 0; i < n; i++) {
-        dst[i] = PyStackRef_AsPyObjectSteal(src[i]);
+        PyObject *item = PyStackRef_AsPyObjectSteal(src[i]);
+        if (!track && maybe_tracked(item)) {
+            track = true;
+        }
+        dst[i] = item;
+    }
+    if (track) {
+        _PyObject_GC_TRACK(tuple);
      }
-    _PyObject_GC_TRACK(tuple);
      return (PyObject *)tuple;
  }
  
diff --git a/Python/gc.c b/Python/gc.c

index 41854265361b2454d3abed0cf23a1dfd4e4e5369..55546cd00993545bae3a415e4fadea910a08951e 100644 (file)
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1590,7 +1590,7 @@ assess_work_to_do(GCState *gcstate)
          scale_factor = 2;
      }
      intptr_t new_objects = gcstate->young.count;
-    intptr_t max_heap_fraction = new_objects*3/2;
+    intptr_t max_heap_fraction = new_objects*2;
      intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
      if (heap_fraction > max_heap_fraction) {
          heap_fraction = max_heap_fraction;
@@ -1605,6 +1605,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
      GC_STAT_ADD(1, collections, 1);
      GCState *gcstate = &tstate->interp->gc;
      gcstate->work_to_do += assess_work_to_do(gcstate);
+    if (gcstate->work_to_do < 0) {
+        return;
+    }
      untrack_tuples(&gcstate->young.head);
      if (gcstate->phase == GC_PHASE_MARK) {
          Py_ssize_t objects_marked = mark_at_start(tstate);
@@ -1647,7 +1650,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
      gc_collect_region(tstate, &increment, &survivors, stats);
      gc_list_merge(&survivors, visited);
      assert(gc_list_is_empty(&increment));
-    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
      gcstate->work_to_do -= increment_size;
  
      add_stats(gcstate, 1, stats);
@@ -2231,21 +2233,11 @@ _Py_ScheduleGC(PyThreadState *tstate)
  }
  
  void
-_PyObject_GC_Link(PyObject *op)
+_Py_TriggerGC(struct _gc_runtime_state *gcstate)
  {
-    PyGC_Head *gc = AS_GC(op);
-    // gc must be correctly aligned
-    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
-
      PyThreadState *tstate = _PyThreadState_GET();
-    GCState *gcstate = &tstate->interp->gc;
-    gc->_gc_next = 0;
-    gc->_gc_prev = 0;
-    gcstate->young.count++; /* number of allocated GC objects */
-    gcstate->heap_size++;
-    if (gcstate->young.count > gcstate->young.threshold &&
-        gcstate->enabled &&
-        gcstate->young.threshold &&
+    if (gcstate->enabled &&
+        gcstate->young.threshold != 0 &&
          !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
          !_PyErr_Occurred(tstate))
      {
@@ -2253,6 +2245,17 @@ _PyObject_GC_Link(PyObject *op)
      }
  }
  
+void
+_PyObject_GC_Link(PyObject *op)
+{
+    PyGC_Head *gc = AS_GC(op);
+    // gc must be correctly aligned
+    _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
+    gc->_gc_next = 0;
+    gc->_gc_prev = 0;
+
+}
+
  void
  _Py_RunGC(PyThreadState *tstate)
  {
@@ -2359,6 +2362,11 @@ PyObject_GC_Del(void *op)
      PyGC_Head *g = AS_GC(op);
      if (_PyObject_GC_IS_TRACKED(op)) {
          gc_list_remove(g);
+        GCState *gcstate = get_gc_state();
+        if (gcstate->young.count > 0) {
+            gcstate->young.count--;
+        }
+        gcstate->heap_size--;
  #ifdef Py_DEBUG
          PyObject *exc = PyErr_GetRaisedException();
          if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
@@ -2372,11 +2380,6 @@ PyObject_GC_Del(void *op)
          PyErr_SetRaisedException(exc);
  #endif
      }
-    GCState *gcstate = get_gc_state();
-    if (gcstate->young.count > 0) {
-        gcstate->young.count--;
-    }
-    gcstate->heap_size--;
      PyObject_Free(((char *)op)-presize);
  }
author	Mark Shannon <mark@hotpy.org>
	Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)
committer	GitHub <noreply@github.com>
	Thu, 23 Oct 2025 14:26:58 +0000 (15:26 +0100)
Include/internal/pycore_gc.h		patch \| blob \| blame \| history
Lib/test/test_gc.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst	[new file with mode: 0644]	patch \| blob
Objects/tupleobject.c		patch \| blob \| blame \| history
Python/gc.c		patch \| blob \| blame \| history