gh-112529: Make the GC scheduling thread-safe (#114880)

author Sam Gross <colesbury@gmail.com>

Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)

committer GitHub <noreply@github.com>

Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)
author Sam Gross <colesbury@gmail.com>
Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)
committer GitHub <noreply@github.com>
Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h

index 582a16bf5218cef3be3b6c29af898b578ac8a4d7..a98864f74313987dc5d9a9edb20308b41054fe4c 100644 (file)
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -260,6 +260,13 @@ struct _gc_runtime_state {
      Py_ssize_t long_lived_pending;
  };
  
+#ifdef Py_GIL_DISABLED
+struct _gc_thread_state {
+    /* Thread-local allocation count. */
+    Py_ssize_t alloc_count;
+};
+#endif
+
  
  extern void _PyGC_InitState(struct _gc_runtime_state *);
  
diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h

index 97aa85a659fa7b421de38c9c5a27f63083dfb0f6..7fb9ab2056704e10843f79cc42fb90ae4978aeb4 100644 (file)
--- a/Include/internal/pycore_tstate.h
+++ b/Include/internal/pycore_tstate.h
@@ -28,6 +28,7 @@ typedef struct _PyThreadStateImpl {
      PyThreadState base;
  
  #ifdef Py_GIL_DISABLED
+    struct _gc_thread_state gc;
      struct _mimalloc_thread_state mimalloc;
      struct _Py_object_freelists freelists;
      struct _brc_thread_state brc;
diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py

index baed03d92b9e56181bf9712f49f7a969427fd44d..f88206de550da08acc07403b3852f05c9f66830c 100644 (file)
--- a/Lib/test/test_frame.py
+++ b/Lib/test/test_frame.py
@@ -13,7 +13,7 @@ except ImportError:
      _testcapi = None
  
  from test import support
-from test.support import threading_helper
+from test.support import threading_helper, Py_GIL_DISABLED
  from test.support.script_helper import assert_python_ok
  
  
@@ -294,6 +294,7 @@ class TestIncompleteFrameAreInvisible(unittest.TestCase):
          assert_python_ok("-c", code)
  
      @support.cpython_only
+    @unittest.skipIf(Py_GIL_DISABLED, "test requires precise GC scheduling")
      def test_sneaky_frame_object(self):
  
          def trace(frame, event, arg):
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py

index b01f344cb14a1a77cace0c99692aa74c193ac7ec..dd09643788d62fbb5407e169a9d4061c509d3b4b 100644 (file)
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -363,6 +363,7 @@ class GCTests(unittest.TestCase):
      # To minimize variations, though, we first store the get_count() results
      # and check them at the end.
      @refcount_test
+    @unittest.skipIf(Py_GIL_DISABLED, 'needs precise allocation counts')
      def test_get_count(self):
          gc.collect()
          a, b, c = gc.get_count()
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c

index a2b66b9b78c169d442da8b7fdcc564df9281def5..961165e16a0fee9c5632db8c51a111d0461a279b 100644 (file)
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -201,6 +201,16 @@ gc_get_count_impl(PyObject *module)
  /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
  {
      GCState *gcstate = get_gc_state();
+
+#ifdef Py_GIL_DISABLED
+    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+    struct _gc_thread_state *gc = &tstate->gc;
+
+    // Flush the local allocation count to the global count
+    _Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
+    gc->alloc_count = 0;
+#endif
+
      return Py_BuildValue("(iii)",
                           gcstate->generations[0].count,
                           gcstate->generations[1].count,
diff --git a/Objects/typeobject.c b/Objects/typeobject.c

index 0118ee255ef0177c25ee9519a0f4bb27255c8111..2e25c207c643829354c160a5abc06cf386afed90 100644 (file)
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1835,6 +1835,8 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems)
      if (presize) {
          ((PyObject **)alloc)[0] = NULL;
          ((PyObject **)alloc)[1] = NULL;
+    }
+    if (PyType_IS_GC(type)) {
          _PyObject_GC_Link(obj);
      }
      memset(obj, '\0', size);
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c

index 3dc1dc19182eb41797c982e422373e92fee72fa1..a758c99285a53909485f17b55c5cc8a0e3fd001f 100644 (file)
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -23,6 +23,11 @@ typedef struct _gc_runtime_state GCState;
  #  define GC_DEBUG
  #endif
  
+// Each thread buffers the count of allocated objects in a thread-local
+// variable up to +/- this amount to reduce the overhead of updating
+// the global count.
+#define LOCAL_ALLOC_COUNT_THRESHOLD 512
+
  // Automatically choose the generation that needs collecting.
  #define GENERATION_AUTO (-1)
  
@@ -959,6 +964,41 @@ gc_should_collect(GCState *gcstate)
              gcstate->generations[1].threshold == 0);
  }
  
+static void
+record_allocation(PyThreadState *tstate)
+{
+    struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
+
+    // We buffer the allocation count to avoid the overhead of atomic
+    // operations for every allocation.
+    gc->alloc_count++;
+    if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) {
+        // TODO: Use Py_ssize_t for the generation count.
+        GCState *gcstate = &tstate->interp->gc;
+        _Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
+        gc->alloc_count = 0;
+
+        if (gc_should_collect(gcstate) &&
+            !_Py_atomic_load_int_relaxed(&gcstate->collecting))
+        {
+            _Py_ScheduleGC(tstate->interp);
+        }
+    }
+}
+
+static void
+record_deallocation(PyThreadState *tstate)
+{
+    struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
+
+    gc->alloc_count--;
+    if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
+        GCState *gcstate = &tstate->interp->gc;
+        _Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
+        gc->alloc_count = 0;
+    }
+}
+
  static void
  gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
  {
@@ -981,6 +1021,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
          }
      }
  
+    // Record the number of live GC objects
+    interp->gc.long_lived_total = state->long_lived_total;
+
      // Clear weakrefs and enqueue callbacks (but do not call them).
      clear_weakrefs(state);
      _PyEval_StartTheWorld(interp);
@@ -1090,7 +1133,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
  
      m = state.collected;
      n = state.uncollectable;
-    gcstate->long_lived_total = state.long_lived_total;
  
      if (gcstate->debug & _PyGC_DEBUG_STATS) {
          double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
@@ -1530,15 +1572,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
  void
  _PyObject_GC_Link(PyObject *op)
  {
-    PyThreadState *tstate = _PyThreadState_GET();
-    GCState *gcstate = &tstate->interp->gc;
-    gcstate->generations[0].count++;
-
-    if (gc_should_collect(gcstate) &&
-        !_Py_atomic_load_int_relaxed(&gcstate->collecting))
-    {
-        _Py_ScheduleGC(tstate->interp);
-    }
+    record_allocation(_PyThreadState_GET());
  }
  
  void
@@ -1564,7 +1598,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
          ((PyObject **)mem)[1] = NULL;
      }
      PyObject *op = (PyObject *)(mem + presize);
-    _PyObject_GC_Link(op);
+    record_allocation(tstate);
      return op;
  }
  
@@ -1646,10 +1680,9 @@ PyObject_GC_Del(void *op)
          PyErr_SetRaisedException(exc);
  #endif
      }
-    GCState *gcstate = get_gc_state();
-    if (gcstate->generations[0].count > 0) {
-        gcstate->generations[0].count--;
-    }
+
+    record_deallocation(_PyThreadState_GET());
+
      PyObject_Free(((char *)op)-presize);
  }
author	Sam Gross <colesbury@gmail.com>
	Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)
committer	GitHub <noreply@github.com>
	Fri, 16 Feb 2024 16:22:27 +0000 (11:22 -0500)
Include/internal/pycore_gc.h		patch \| blob \| blame \| history
Include/internal/pycore_tstate.h		patch \| blob \| blame \| history
Lib/test/test_frame.py		patch \| blob \| blame \| history
Lib/test/test_gc.py		patch \| blob \| blame \| history
Modules/gcmodule.c		patch \| blob \| blame \| history
Objects/typeobject.c		patch \| blob \| blame \| history
Python/gc_free_threading.c		patch \| blob \| blame \| history