[3.14] gh-142531: Fix free-threaded GC performance regression (gh-142562) (gh-142617)

author Neil Schemenauer <nas-github@arctrix.com>

Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)

committer GitHub <noreply@github.com>

Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)
author Neil Schemenauer <nas-github@arctrix.com>
Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)
committer GitHub <noreply@github.com>
Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py

index a6adc2c98514bad0fa19b561ee3b55c207e6b3ce..3e3092dcae11198d9533790ad74f57471ac555b8 100644 (file)
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1157,6 +1157,24 @@ class GCTests(unittest.TestCase):
          assert_python_ok("-c", source)
  
  
+    @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+    def test_tuple_untrack_counts(self):
+        # This ensures that the free-threaded GC is counting untracked tuples
+        # in the "long_lived_total" count.  This is required to avoid
+        # performance issues from running the GC too frequently.  See
+        # GH-142531 as an example.
+        gc.collect()
+        count = _testinternalcapi.get_long_lived_total()
+        n = 20_000
+        tuples = [(x,) for x in range(n)]
+        gc.collect()
+        new_count = _testinternalcapi.get_long_lived_total()
+        self.assertFalse(gc.is_tracked(tuples[0]))
+        # Use n // 2 just in case some other objects were collected.
+        self.assertTrue(new_count - count > (n // 2))
+
+
  class IncrementalGCTests(unittest.TestCase):
      @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
      @requires_gil_enabled("Free threading does not support incremental GC")
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst

new file mode 100644 (file)

index 0000000..15e03c1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
@@ -0,0 +1,5 @@
+Fix a free-threaded GC performance regression.  If there are many untracked
+tuples, the GC will run too often, resulting in poor performance.  The fix
+is to include untracked tuples in the "long lived" object count. The number
+of frozen objects is also now included since the free-threaded GC must
+scan those too.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c

index ce11a81211e7e6f3b48c6ab4756a1156185588e4..27f2d70e832c0f870586070e65a64b66775ccfd3 100644 (file)
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
      }
      return PyLong_FromVoidPtr(bc);
  }
+
+static PyObject *
+get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
+}
+
  #endif
  
  static PyObject *
@@ -2552,6 +2559,7 @@ static PyMethodDef module_functions[] = {
      {"py_thread_id", get_py_thread_id, METH_NOARGS},
      {"get_tlbc", get_tlbc, METH_O, NULL},
      {"get_tlbc_id", get_tlbc_id, METH_O, NULL},
+    {"get_long_lived_total", get_long_lived_total, METH_NOARGS},
  #endif
  #ifdef _Py_TIER2
      {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c

index d096accb4371c1e49a98d7af9bf688c6083b4538..079b6b78dcd96ff5c513206dc173dda78b125ea9 100644 (file)
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -374,6 +374,19 @@ op_from_block(void *block, void *arg, bool include_frozen)
      return op;
  }
  
+// As above but returns untracked and frozen objects as well.
+static PyObject *
+op_from_block_all_gc(void *block, void *arg)
+{
+    struct visitor_args *a = arg;
+    if (block == NULL) {
+        return NULL;
+    }
+    PyObject *op = (PyObject *)((char*)block + a->offset);
+    assert(PyObject_IS_GC(op));
+    return op;
+}
+
  static int
  gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
                           struct visitor_args *arg)
@@ -1175,12 +1188,20 @@ static bool
  scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
                    void *block, size_t block_size, void *args)
  {
-    PyObject *op = op_from_block(block, args, false);
+    PyObject *op = op_from_block_all_gc(block, args);
      if (op == NULL) {
          return true;
      }
-
      struct collection_state *state = (struct collection_state *)args;
+    // The free-threaded GC cost is proportional to the number of objects in
+    // the mimalloc GC heap and so we should include the counts for untracked
+    // and frozen objects as well.  This is especially important if many
+    // tuples have been untracked.
+    state->long_lived_total++;
+    if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) {
+        return true;
+    }
+
      if (gc_is_unreachable(op)) {
          // Disable deferred refcounting for unreachable objects so that they
          // are collected immediately after finalization.
@@ -1198,6 +1219,9 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
          else {
              worklist_push(&state->unreachable, op);
          }
+        // It is possible this object will be resurrected but
+        // for now we assume it will be deallocated.
+        state->long_lived_total--;
          return true;
      }
  
@@ -1211,7 +1235,6 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
      // object is reachable, restore `ob_tid`; we're done with these objects
      gc_restore_tid(op);
      gc_clear_alive(op);
-    state->long_lived_total++;
      return true;
  }
  
@@ -1818,6 +1841,7 @@ handle_resurrected_objects(struct collection_state *state)
                  _PyObject_ASSERT(op, Py_REFCNT(op) > 1);
                  worklist_remove(&iter);
                  merge_refcount(op, -1);  // remove worklist reference
+                state->long_lived_total++;
              }
          }
      }
@@ -2220,9 +2244,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
          }
      }
  
-    // Record the number of live GC objects
-    interp->gc.long_lived_total = state->long_lived_total;
-
      // Clear weakrefs and enqueue callbacks (but do not call them).
      clear_weakrefs(state);
      _PyEval_StartTheWorld(interp);
@@ -2240,6 +2261,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
      err = handle_resurrected_objects(state);
      // Clear free lists in all threads
      _PyGC_ClearAllFreeLists(interp);
+    // Record the number of live GC objects
+    interp->gc.long_lived_total = state->long_lived_total;
      _PyEval_StartTheWorld(interp);
  
      if (err < 0) {
author	Neil Schemenauer <nas-github@arctrix.com>
	Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)
committer	GitHub <noreply@github.com>
	Fri, 12 Dec 2025 22:30:20 +0000 (14:30 -0800)
Lib/test/test_gc.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst	[new file with mode: 0644]	patch \| blob
Modules/_testinternalcapi.c		patch \| blob \| blame \| history
Python/gc_free_threading.c		patch \| blob \| blame \| history