]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-123358: Use `_PyStackRef` in `LOAD_DEREF` (gh-130064)
authorSam Gross <colesbury@gmail.com>
Wed, 26 Mar 2025 16:08:20 +0000 (12:08 -0400)
committerGitHub <noreply@github.com>
Wed, 26 Mar 2025 16:08:20 +0000 (12:08 -0400)
Concurrent accesses from multiple threads to the same `cell` object did not
scale well in the free-threaded build. Use `_PyStackRef` and optimistically
avoid locking to improve scaling.

With the locks around cell reads gone, some of the free threading tests were
prone to starvation: the readers were able to run in a tight loop and the
writer threads weren't scheduled frequently enough to make timely progress.
Adjust the tests to avoid this.

Co-authored-by: Donghee Na <donghee.na@python.org>
12 files changed:
Include/internal/pycore_cell.h
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_free_threading/test_dict.py
Lib/test/test_free_threading/test_func_annotations.py
Lib/test/test_free_threading/test_gc.py
Lib/test/test_free_threading/test_list.py
Lib/test/test_free_threading/test_monitoring.py
Lib/test/test_free_threading/test_type.py
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h

index 27f67d57b2fb794296b4ea86e5497a7d5a6fdc63..cef01e80514f4b151b4535060ae5eeda3b12c1b5 100644 (file)
@@ -2,6 +2,8 @@
 #define Py_INTERNAL_CELL_H
 
 #include "pycore_critical_section.h"
+#include "pycore_object.h"
+#include "pycore_stackref.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -19,7 +21,7 @@ PyCell_SwapTakeRef(PyCellObject *cell, PyObject *value)
     PyObject *old_value;
     Py_BEGIN_CRITICAL_SECTION(cell);
     old_value = cell->ob_ref;
-    cell->ob_ref = value;
+    FT_ATOMIC_STORE_PTR_RELEASE(cell->ob_ref, value);
     Py_END_CRITICAL_SECTION();
     return old_value;
 }
@@ -37,11 +39,36 @@ PyCell_GetRef(PyCellObject *cell)
 {
     PyObject *res;
     Py_BEGIN_CRITICAL_SECTION(cell);
+#ifdef Py_GIL_DISABLED
+    res = _Py_XNewRefWithLock(cell->ob_ref);
+#else
     res = Py_XNewRef(cell->ob_ref);
+#endif
     Py_END_CRITICAL_SECTION();
     return res;
 }
 
+static inline _PyStackRef
+_PyCell_GetStackRef(PyCellObject *cell)
+{
+    PyObject *value;
+#ifdef Py_GIL_DISABLED
+    value = _Py_atomic_load_ptr(&cell->ob_ref);
+    if (value == NULL) {
+        return PyStackRef_NULL;
+    }
+    _PyStackRef ref;
+    if (_Py_TryIncrefCompareStackRef(&cell->ob_ref, value, &ref)) {
+        return ref;
+    }
+#endif
+    value = PyCell_GetRef(cell);
+    if (value == NULL) {
+        return PyStackRef_NULL;
+    }
+    return PyStackRef_FromPyObjectSteal(value);
+}
+
 #ifdef __cplusplus
 }
 #endif
index 096cd0b5e8db672878d55404cc991c0784220c96..9c10130259187288e33b49078cabf30991a7655d 100644 (file)
@@ -1195,7 +1195,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
     [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG },
     [LOAD_CONST_IMMORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG },
     [LOAD_CONST_MORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG },
-    [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
+    [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG },
     [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
     [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
index 7c2c8715fb486294c68071091c528c90f2c7a917..59572fca3753d6ef6b2fb7535ea52bf426102b77 100644 (file)
@@ -131,7 +131,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
     [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
     [_LOAD_FROM_DICT_OR_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
-    [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+    [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG,
     [_COPY_FREE_VARS] = HAS_ARG_FLAG,
     [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG,
index 4f605e0c51f0d5f627f84ae6ac552ae74843c47f..476cc3178d843f6cee40a27aa279bce5cfa1b991 100644 (file)
@@ -74,6 +74,7 @@ class TestDict(TestCase):
             last = -1
             while True:
                 if CUR == last:
+                    time.sleep(0.001)
                     continue
                 elif CUR == OBJECT_COUNT:
                     break
index b3e92956072c7559e4085696f9434cfa96bc1f2c..0fde0136d77c7fa005c5aa6f386fffdd1d93b5d7 100644 (file)
@@ -27,13 +27,13 @@ def set_func_annotation(f, b):
 
 @unittest.skipUnless(Py_GIL_DISABLED, "Enable only in FT build")
 class TestFTFuncAnnotations(TestCase):
-    NUM_THREADS = 8
+    NUM_THREADS = 4
 
     def test_concurrent_read(self):
         def f(x: int) -> int:
             return x + 1
 
-        for _ in range(100):
+        for _ in range(10):
             with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor:
                 b = Barrier(self.NUM_THREADS)
                 futures = {executor.submit(get_func_annotation, f, b): i for i in range(self.NUM_THREADS)}
@@ -54,7 +54,7 @@ class TestFTFuncAnnotations(TestCase):
         def bar(x: int, y: float) -> float:
             return y ** x
 
-        for _ in range(100):
+        for _ in range(10):
             with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor:
                 b = Barrier(self.NUM_THREADS)
                 futures = {executor.submit(set_func_annotation, bar, b): i for i in range(self.NUM_THREADS)}
index 401067fe9c612c553e8d77cc5efda800bf56c6a4..3b83e0431efa6b66c76a3b285c33a11e8899d9bd 100644 (file)
@@ -35,24 +35,30 @@ class TestGC(TestCase):
             pass
 
     def test_get_referrers(self):
+        NUM_GC = 2
+        NUM_MUTATORS = 4
+
+        b = threading.Barrier(NUM_GC + NUM_MUTATORS)
         event = threading.Event()
 
         obj = MyObj()
 
         def gc_thread():
+            b.wait()
             for i in range(100):
                 o = gc.get_referrers(obj)
             event.set()
 
         def mutator_thread():
+            b.wait()
             while not event.is_set():
                 d1 = { "key": obj }
                 d2 = { "key": obj }
                 d3 = { "key": obj }
                 d4 = { "key": obj }
 
-        gcs = [Thread(target=gc_thread) for _ in range(2)]
-        mutators = [Thread(target=mutator_thread) for _ in range(4)]
+        gcs = [Thread(target=gc_thread) for _ in range(NUM_GC)]
+        mutators = [Thread(target=mutator_thread) for _ in range(NUM_MUTATORS)]
         with threading_helper.start_threads(gcs + mutators):
             pass
 
index 2c9af65273a1c8636210364d6066f1f53fdf2cb7..44c0ac74e02aa35f096736c9f4a073c5fce55c54 100644 (file)
@@ -20,11 +20,14 @@ class TestList(TestCase):
     def test_racing_iter_append(self):
         l = []
 
-        def writer_func():
+        barrier = Barrier(NTHREAD + 1)
+        def writer_func(l):
+            barrier.wait()
             for i in range(OBJECT_COUNT):
                 l.append(C(i + OBJECT_COUNT))
 
-        def reader_func():
+        def reader_func(l):
+            barrier.wait()
             while True:
                 count = len(l)
                 for i, x in enumerate(l):
@@ -32,10 +35,10 @@ class TestList(TestCase):
                 if count == OBJECT_COUNT:
                     break
 
-        writer = Thread(target=writer_func)
+        writer = Thread(target=writer_func, args=(l,))
         readers = []
         for x in range(NTHREAD):
-            reader = Thread(target=reader_func)
+            reader = Thread(target=reader_func, args=(l,))
             readers.append(reader)
             reader.start()
 
@@ -47,11 +50,14 @@ class TestList(TestCase):
     def test_racing_iter_extend(self):
         l = []
 
+        barrier = Barrier(NTHREAD + 1)
         def writer_func():
+            barrier.wait()
             for i in range(OBJECT_COUNT):
                 l.extend([C(i + OBJECT_COUNT)])
 
         def reader_func():
+            barrier.wait()
             while True:
                 count = len(l)
                 for i, x in enumerate(l):
index 8fec01715531cbe133d40badb923be2fd97a9a82..a480e398722c33fc4532337e586f94ead8ac8e53 100644 (file)
@@ -8,7 +8,7 @@ import weakref
 
 from sys import monitoring
 from test.support import threading_helper
-from threading import Thread, _PyRLock
+from threading import Thread, _PyRLock, Barrier
 from unittest import TestCase
 
 
@@ -194,7 +194,9 @@ class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
 
 @threading_helper.requires_working_threading()
 class MonitoringMisc(MonitoringTestMixin, TestCase):
-    def register_callback(self):
+    def register_callback(self, barrier):
+        barrier.wait()
+
         def callback(*args):
             pass
 
@@ -206,8 +208,9 @@ class MonitoringMisc(MonitoringTestMixin, TestCase):
     def test_register_callback(self):
         self.refs = []
         threads = []
-        for i in range(50):
-            t = Thread(target=self.register_callback)
+        barrier = Barrier(5)
+        for i in range(5):
+            t = Thread(target=self.register_callback, args=(barrier,))
             t.start()
             threads.append(t)
 
index 53f6d778bbecbc263f699a39702a936eb74caf99..ae996e7db3c7fd7fddded99311270e6393992157 100644 (file)
@@ -45,26 +45,20 @@ class TestType(TestCase):
         class C:
             x = 0
 
-        DONE = False
         def writer_func():
-            for i in range(3000):
+            for _ in range(3000):
                 C.x
                 C.x
                 C.x += 1
-            nonlocal DONE
-            DONE = True
 
         def reader_func():
-            while True:
+            for _ in range(3000):
                 # We should always see a greater value read from the type than the
                 # dictionary
                 a = C.__dict__['x']
                 b = C.x
                 self.assertGreaterEqual(b, a)
 
-                if DONE:
-                    break
-
         self.run_one(writer_func, reader_func)
 
     def test_attr_cache_consistency_subclass(self):
@@ -74,26 +68,20 @@ class TestType(TestCase):
         class D(C):
             pass
 
-        DONE = False
         def writer_func():
-            for i in range(3000):
+            for _ in range(3000):
                 D.x
                 D.x
                 C.x += 1
-            nonlocal DONE
-            DONE = True
 
         def reader_func():
-            while True:
+            for _ in range(3000):
                 # We should always see a greater value read from the type than the
                 # dictionary
                 a = C.__dict__['x']
                 b = D.x
                 self.assertGreaterEqual(b, a)
 
-                if DONE:
-                    break
-
         self.run_one(writer_func, reader_func)
 
     def test___class___modification(self):
@@ -140,10 +128,18 @@ class TestType(TestCase):
 
 
     def run_one(self, writer_func, reader_func):
-        writer = Thread(target=writer_func)
+        barrier = threading.Barrier(NTHREADS)
+
+        def wrap_target(target):
+            def wrapper():
+                barrier.wait()
+                target()
+            return wrapper
+
+        writer = Thread(target=wrap_target(writer_func))
         readers = []
-        for x in range(30):
-            reader = Thread(target=reader_func)
+        for x in range(NTHREADS - 1):
+            reader = Thread(target=wrap_target(reader_func))
             readers.append(reader)
             reader.start()
 
index 44e8ea2450800c4af38f0338e9e4f32afb706dc2..b6a482183b1d0bcbb99e7fb03a7ff537a25cbac8 100644 (file)
@@ -1822,12 +1822,11 @@ dummy_func(
 
         inst(LOAD_DEREF, ( -- value)) {
             PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg));
-            PyObject *value_o = PyCell_GetRef(cell);
-            if (value_o == NULL) {
+            value = _PyCell_GetStackRef(cell);
+            if (PyStackRef_IsNull(value)) {
                 _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
                 ERROR_IF(true, error);
             }
-            value = PyStackRef_FromPyObjectSteal(value_o);
         }
 
         inst(STORE_DEREF, (v --)) {
index 43f9a81d2c70f8ff8b000b634eb9ee0e412774a4..6d5f3dd1f6b93502ca75d03c40f099e1bdae8bd8 100644 (file)
             _PyStackRef value;
             oparg = CURRENT_OPARG();
             PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg));
-            PyObject *value_o = PyCell_GetRef(cell);
-            if (value_o == NULL) {
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            value = _PyCell_GetStackRef(cell);
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            if (PyStackRef_IsNull(value)) {
+                stack_pointer[0] = value;
+                stack_pointer += 1;
+                assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_ERROR();
             }
-            value = PyStackRef_FromPyObjectSteal(value_o);
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
index 493d99db2d36776979c25e0b09bb3ef88907e3cc..f6a538e98fe1291943b1b9cd273f9fe345e158e8 100644 (file)
             INSTRUCTION_STATS(LOAD_DEREF);
             _PyStackRef value;
             PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg));
-            PyObject *value_o = PyCell_GetRef(cell);
-            if (value_o == NULL) {
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            value = _PyCell_GetStackRef(cell);
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            if (PyStackRef_IsNull(value)) {
+                stack_pointer[0] = value;
+                stack_pointer += 1;
+                assert(WITHIN_STACK_BOUNDS());
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_LABEL(error);
             }
-            value = PyStackRef_FromPyObjectSteal(value_o);
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());