gh-142531: Fix free-threaded GC performance regression (gh-142562)

author Neil Schemenauer <nas-github@arctrix.com>

Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)

committer GitHub <noreply@github.com>

Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)
author Neil Schemenauer <nas-github@arctrix.com>
Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)
committer GitHub <noreply@github.com>
Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py

index ec5df4d20e7085a3cf22e49d6c05b6f0566cc746..6aa6361d5d0b9243afb485863861c3505d51be7d 100644 (file)
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1231,6 +1231,24 @@ class GCTests(unittest.TestCase):
          assert_python_ok("-c", code_inside_function)
  
  
+    @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+    def test_tuple_untrack_counts(self):
+        # This ensures that the free-threaded GC is counting untracked tuples
+        # in the "long_lived_total" count.  This is required to avoid
+        # performance issues from running the GC too frequently.  See
+        # GH-142531 as an example.
+        gc.collect()
+        count = _testinternalcapi.get_long_lived_total()
+        n = 20_000
+        tuples = [(x,) for x in range(n)]
+        gc.collect()
+        new_count = _testinternalcapi.get_long_lived_total()
+        self.assertFalse(gc.is_tracked(tuples[0]))
+        # Use n // 2 just in case some other objects were collected.
+        self.assertTrue(new_count - count > (n // 2))
+
+
  class IncrementalGCTests(unittest.TestCase):
      @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
      @requires_gil_enabled("Free threading does not support incremental GC")
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst

new file mode 100644 (file)

index 0000000..15e03c1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst
@@ -0,0 +1,5 @@
+Fix a free-threaded GC performance regression.  If there are many untracked
+tuples, the GC will run too often, resulting in poor performance.  The fix
+is to include untracked tuples in the "long lived" object count. The number
+of frozen objects is also now included since the free-threaded GC must
+scan those too.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c

index 89e558b0fe89336f43774b6e68fab68aae550c34..4140cd23ded95e6adc0fc65d7da6f5ec931b4d9c 100644 (file)
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
      }
      return PyLong_FromVoidPtr(bc);
  }
+
+static PyObject *
+get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
+}
+
  #endif
  
  static PyObject *
@@ -2590,6 +2597,7 @@ static PyMethodDef module_functions[] = {
      {"py_thread_id", get_py_thread_id, METH_NOARGS},
      {"get_tlbc", get_tlbc, METH_O, NULL},
      {"get_tlbc_id", get_tlbc_id, METH_O, NULL},
+    {"get_long_lived_total", get_long_lived_total, METH_NOARGS},
  #endif
  #ifdef _Py_TIER2
      {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c

index e672e870db2f27e10fdfa2c73a40b4ff973f65f1..7ba94d5381b72e17a620c88d71292c614de722d6 100644 (file)
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -375,6 +375,19 @@ op_from_block(void *block, void *arg, bool include_frozen)
      return op;
  }
  
+// As above but returns untracked and frozen objects as well.
+static PyObject *
+op_from_block_all_gc(void *block, void *arg)
+{
+    struct visitor_args *a = arg;
+    if (block == NULL) {
+        return NULL;
+    }
+    PyObject *op = (PyObject *)((char*)block + a->offset);
+    assert(PyObject_IS_GC(op));
+    return op;
+}
+
  static int
  gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
                           struct visitor_args *arg)
@@ -1186,12 +1199,20 @@ static bool
  scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
                    void *block, size_t block_size, void *args)
  {
-    PyObject *op = op_from_block(block, args, false);
+    PyObject *op = op_from_block_all_gc(block, args);
      if (op == NULL) {
          return true;
      }
-
      struct collection_state *state = (struct collection_state *)args;
+    // The free-threaded GC cost is proportional to the number of objects in
+    // the mimalloc GC heap and so we should include the counts for untracked
+    // and frozen objects as well.  This is especially important if many
+    // tuples have been untracked.
+    state->long_lived_total++;
+    if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) {
+        return true;
+    }
+
      if (gc_is_unreachable(op)) {
          // Disable deferred refcounting for unreachable objects so that they
          // are collected immediately after finalization.
@@ -1209,6 +1230,9 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
          else {
              worklist_push(&state->unreachable, op);
          }
+        // It is possible this object will be resurrected but
+        // for now we assume it will be deallocated.
+        state->long_lived_total--;
          return true;
      }
  
@@ -1222,7 +1246,6 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
      // object is reachable, restore `ob_tid`; we're done with these objects
      gc_restore_tid(op);
      gc_clear_alive(op);
-    state->long_lived_total++;
      return true;
  }
  
@@ -1891,6 +1914,7 @@ handle_resurrected_objects(struct collection_state *state)
                  _PyObject_ASSERT(op, Py_REFCNT(op) > 1);
                  worklist_remove(&iter);
                  merge_refcount(op, -1);  // remove worklist reference
+                state->long_lived_total++;
              }
          }
      }
@@ -2303,9 +2327,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
          }
      }
  
-    // Record the number of live GC objects
-    interp->gc.long_lived_total = state->long_lived_total;
-
      // Find weakref callbacks we will honor (but do not call them).
      find_weakref_callbacks(state);
      _PyEval_StartTheWorld(interp);
@@ -2326,8 +2347,11 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
      if (err == 0) {
          clear_weakrefs(state);
      }
+    // Record the number of live GC objects
+    interp->gc.long_lived_total = state->long_lived_total;
      _PyEval_StartTheWorld(interp);
  
+
      if (err < 0) {
          cleanup_worklist(&state->unreachable);
          cleanup_worklist(&state->legacy_finalizers);
author	Neil Schemenauer <nas-github@arctrix.com>
	Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)
committer	GitHub <noreply@github.com>
	Thu, 11 Dec 2025 20:30:56 +0000 (12:30 -0800)
Lib/test/test_gc.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-23-03-10.gh-issue-142531.NUEa1T.rst	[new file with mode: 0644]	patch \| blob
Modules/_testinternalcapi.c		patch \| blob \| blame \| history
Python/gc_free_threading.c		patch \| blob \| blame \| history