gh-116818: Make `sys.settrace`, `sys.setprofile`, and monitoring thread-safe (#116775)

author Dino Viehland <dinoviehland@meta.com>

Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)

committer GitHub <noreply@github.com>

Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)
author Dino Viehland <dinoviehland@meta.com>
Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)
committer GitHub <noreply@github.com>
Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)
diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h

index c3e132d3877ca51061c14044082820bb738d3bfe..69083f1d9dd0c23211cda4fa11592e17c0647257 100644 (file)
--- a/Include/cpython/pyatomic.h
+++ b/Include/cpython/pyatomic.h
@@ -465,10 +465,16 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj,
  static inline void *
  _Py_atomic_load_ptr_acquire(const void *obj);
  
+static inline uintptr_t
+_Py_atomic_load_uintptr_acquire(const uintptr_t *obj);
+
  // Stores `*obj = value` (release operation)
  static inline void
  _Py_atomic_store_ptr_release(void *obj, void *value);
  
+static inline void
+_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value);
+
  static inline void
  _Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value);
  
@@ -491,6 +497,8 @@ static inline Py_ssize_t
  _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj);
  
  
+
+
  // --- _Py_atomic_fence ------------------------------------------------------
  
  // Sequential consistency fence. C11 fences have complex semantics. When
diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h

index 0b40f81bd8736dec150188ec549497ec87e80287..af78a94c736545f677b48865f57259d06ff42548 100644 (file)
--- a/Include/cpython/pyatomic_gcc.h
+++ b/Include/cpython/pyatomic_gcc.h
@@ -492,10 +492,18 @@ static inline void *
  _Py_atomic_load_ptr_acquire(const void *obj)
  { return (void *)__atomic_load_n((void **)obj, __ATOMIC_ACQUIRE); }
  
+static inline uintptr_t
+_Py_atomic_load_uintptr_acquire(const uintptr_t *obj)
+{ return (uintptr_t)__atomic_load_n((uintptr_t *)obj, __ATOMIC_ACQUIRE); }
+
  static inline void
  _Py_atomic_store_ptr_release(void *obj, void *value)
  { __atomic_store_n((void **)obj, value, __ATOMIC_RELEASE); }
  
+static inline void
+_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value)
+{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); }
+
  static inline void
  _Py_atomic_store_int_release(int *obj, int value)
  { __atomic_store_n(obj, value, __ATOMIC_RELEASE); }
diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h

index 3205e253b2854668565aaebd3dfc14f421b82ccd..212cd7817d01c5271b04908c35e044c9377e1284 100644 (file)
--- a/Include/cpython/pyatomic_msc.h
+++ b/Include/cpython/pyatomic_msc.h
@@ -914,6 +914,18 @@ _Py_atomic_load_ptr_acquire(const void *obj)
  #endif
  }
  
+static inline uintptr_t
+_Py_atomic_load_uintptr_acquire(const uintptr_t *obj)
+{
+#if defined(_M_X64) || defined(_M_IX86)
+    return *(uintptr_t volatile *)obj;
+#elif defined(_M_ARM64)
+    return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj);
+#else
+#  error "no implementation of _Py_atomic_load_uintptr_acquire"
+#endif
+}
+
  static inline void
  _Py_atomic_store_ptr_release(void *obj, void *value)
  {
@@ -926,6 +938,19 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
  #endif
  }
  
+static inline void
+_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value)
+{
+#if defined(_M_X64) || defined(_M_IX86)
+    *(uintptr_t volatile *)obj = value;
+#elif defined(_M_ARM64)
+    _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64);
+    __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value);
+#else
+#  error "no implementation of _Py_atomic_store_uintptr_release"
+#endif
+}
+
  static inline void
  _Py_atomic_store_int_release(int *obj, int value)
  {
diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h

index ef34bb0b77dfe55ee61fb72441e8361b6329dfd9..6a77eae536d8ddea2ad2b7bc5af5d77fd995e97a 100644 (file)
--- a/Include/cpython/pyatomic_std.h
+++ b/Include/cpython/pyatomic_std.h
@@ -863,6 +863,14 @@ _Py_atomic_load_ptr_acquire(const void *obj)
                                  memory_order_acquire);
  }
  
+static inline uintptr_t
+_Py_atomic_load_uintptr_acquire(const uintptr_t *obj)
+{
+    _Py_USING_STD;
+    return atomic_load_explicit((const _Atomic(uintptr_t)*)obj,
+                                memory_order_acquire);
+}
+
  static inline void
  _Py_atomic_store_ptr_release(void *obj, void *value)
  {
@@ -871,6 +879,14 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
                            memory_order_release);
  }
  
+static inline void
+_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value)
+{
+    _Py_USING_STD;
+    atomic_store_explicit((_Atomic(uintptr_t)*)obj, value,
+                          memory_order_release);
+}
+
  static inline void
  _Py_atomic_store_int_release(int *obj, int value)
  {
diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h

index b453328f15649eb6dac020869a6c2dd040d03c12..168295534e036cac8395e3b163bd1f37a1f7859f 100644 (file)
--- a/Include/internal/pycore_ceval_state.h
+++ b/Include/internal/pycore_ceval_state.h
@@ -63,6 +63,7 @@ struct _ceval_runtime_state {
      } perf;
      /* Pending calls to be made only on the main thread. */
      struct _pending_calls pending_mainthread;
+    PyMutex sys_trace_profile_mutex;
  };
  
  #ifdef PY_HAVE_PERF_TRAMPOLINE
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h

index 60020b5c01f8a6c51665a9527af022335c58e98d..9e465fdd86279f333f23953ea00aa49158e3ebb5 100644 (file)
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -93,7 +93,7 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
   * threads and needs special purpose when freeing due to
   * the possibility of in-flight lock-free reads occurring.
   * Objects with this bit that are GC objects will automatically
- * delay-freed by PyObject_GC_Del.  */
+ * delay-freed by PyObject_GC_Del. */
  static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
      return (op->ob_gc_bits & _PyGC_BITS_SHARED_INLINE) != 0;
  }
diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h

index 2514f51f1b00862317d125f4ab1296fb2bc2bfdd..fed5d6e0ec2c54ef6cde320ae74bb44ad9c536a9 100644 (file)
--- a/Include/internal/pycore_pyatomic_ft_wrappers.h
+++ b/Include/internal/pycore_pyatomic_ft_wrappers.h
@@ -26,20 +26,34 @@ extern "C" {
      _Py_atomic_load_ssize_relaxed(&value)
  #define FT_ATOMIC_STORE_PTR(value, new_value) \
      _Py_atomic_store_ptr(&value, new_value)
+#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \
+    _Py_atomic_load_ptr_acquire(&value)
+#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) \
+    _Py_atomic_load_uintptr_acquire(&value)
  #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \
      _Py_atomic_store_ptr_relaxed(&value, new_value)
  #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \
      _Py_atomic_store_ptr_release(&value, new_value)
+#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) \
+    _Py_atomic_store_uintptr_release(&value, new_value)
  #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \
      _Py_atomic_store_ssize_relaxed(&value, new_value)
+#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \
+    _Py_atomic_store_uint8_relaxed(&value, new_value)
+
  #else
  #define FT_ATOMIC_LOAD_PTR(value) value
  #define FT_ATOMIC_LOAD_SSIZE(value) value
  #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value
  #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value
+#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value
+#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value
  #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value
  #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value
+#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value
  #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value
+#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value
+
  #endif
  
  #ifdef __cplusplus
diff --git a/Lib/test/test_free_threading/__init__.py b/Lib/test/test_free_threading/__init__.py

new file mode 100644 (file)

index 0000000..9a89d27
--- /dev/null
+++ b/Lib/test/test_free_threading/__init__.py
@@ -0,0 +1,7 @@
+import os
+
+from test import support
+
+
+def load_tests(*args):
+    return support.load_package_tests(os.path.dirname(__file__), *args)
diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py

new file mode 100644 (file)

index 0000000..e170840
--- /dev/null
+++ b/Lib/test/test_free_threading/test_monitoring.py
@@ -0,0 +1,232 @@
+"""Tests monitoring, sys.settrace, and sys.setprofile in a multi-threaded
+environmenet to verify things are thread-safe in a free-threaded build"""
+
+import sys
+import time
+import unittest
+import weakref
+
+from sys import monitoring
+from test.support import is_wasi
+from threading import Thread
+from unittest import TestCase
+
+
+class InstrumentationMultiThreadedMixin:
+    if not hasattr(sys, "gettotalrefcount"):
+        thread_count = 50
+        func_count = 1000
+        fib = 15
+    else:
+        # Run a little faster in debug builds...
+        thread_count = 25
+        func_count = 500
+        fib = 15
+
+    def after_threads(self):
+        """Runs once after all the threads have started"""
+        pass
+
+    def during_threads(self):
+        """Runs repeatedly while the threads are still running"""
+        pass
+
+    def work(self, n, funcs):
+        """Fibonacci function which also calls a bunch of random functions"""
+        for func in funcs:
+            func()
+        if n < 2:
+            return n
+        return self.work(n - 1, funcs) + self.work(n - 2, funcs)
+
+    def start_work(self, n, funcs):
+        # With the GIL builds we need to make sure that the hooks have
+        # a chance to run as it's possible to run w/o releasing the GIL.
+        time.sleep(1)
+        self.work(n, funcs)
+
+    def after_test(self):
+        """Runs once after the test is done"""
+        pass
+
+    def test_instrumentation(self):
+        # Setup a bunch of functions which will need instrumentation...
+        funcs = []
+        for i in range(self.func_count):
+            x = {}
+            exec("def f(): pass", x)
+            funcs.append(x["f"])
+
+        threads = []
+        for i in range(self.thread_count):
+            # Each thread gets a copy of the func list to avoid contention
+            t = Thread(target=self.start_work, args=(self.fib, list(funcs)))
+            t.start()
+            threads.append(t)
+
+        self.after_threads()
+
+        while True:
+            any_alive = False
+            for t in threads:
+                if t.is_alive():
+                    any_alive = True
+                    break
+
+            if not any_alive:
+                break
+
+            self.during_threads()
+
+        self.after_test()
+
+
+class MonitoringTestMixin:
+    def setUp(self):
+        for i in range(6):
+            if monitoring.get_tool(i) is None:
+                self.tool_id = i
+                monitoring.use_tool_id(i, self.__class__.__name__)
+                break
+
+    def tearDown(self):
+        monitoring.free_tool_id(self.tool_id)
+
+
+@unittest.skipIf(is_wasi, "WASI has no threads.")
+class SetPreTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
+    """Sets tracing one time after the threads have started"""
+
+    def setUp(self):
+        super().setUp()
+        self.called = False
+
+    def after_test(self):
+        self.assertTrue(self.called)
+
+    def trace_func(self, frame, event, arg):
+        self.called = True
+        return self.trace_func
+
+    def after_threads(self):
+        sys.settrace(self.trace_func)
+
+
+@unittest.skipIf(is_wasi, "WASI has no threads.")
+class MonitoringMultiThreaded(
+    MonitoringTestMixin, InstrumentationMultiThreadedMixin, TestCase
+):
+    """Uses sys.monitoring and repeatedly toggles instrumentation on and off"""
+
+    def setUp(self):
+        super().setUp()
+        self.set = False
+        self.called = False
+        monitoring.register_callback(
+            self.tool_id, monitoring.events.LINE, self.callback
+        )
+
+    def tearDown(self):
+        monitoring.set_events(self.tool_id, 0)
+        super().tearDown()
+
+    def callback(self, *args):
+        self.called = True
+
+    def after_test(self):
+        self.assertTrue(self.called)
+
+    def during_threads(self):
+        if self.set:
+            monitoring.set_events(
+                self.tool_id, monitoring.events.CALL | monitoring.events.LINE
+            )
+        else:
+            monitoring.set_events(self.tool_id, 0)
+        self.set = not self.set
+
+
+@unittest.skipIf(is_wasi, "WASI has no threads.")
+class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
+    """Uses sys.settrace and repeatedly toggles instrumentation on and off"""
+
+    def setUp(self):
+        self.set = False
+        self.called = False
+
+    def after_test(self):
+        self.assertTrue(self.called)
+
+    def tearDown(self):
+        sys.settrace(None)
+
+    def trace_func(self, frame, event, arg):
+        self.called = True
+        return self.trace_func
+
+    def during_threads(self):
+        if self.set:
+            sys.settrace(self.trace_func)
+        else:
+            sys.settrace(None)
+        self.set = not self.set
+
+
+@unittest.skipIf(is_wasi, "WASI has no threads.")
+class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
+    """Uses sys.setprofile and repeatedly toggles instrumentation on and off"""
+    thread_count = 25
+    func_count = 200
+    fib = 15
+
+    def setUp(self):
+        self.set = False
+        self.called = False
+
+    def after_test(self):
+        self.assertTrue(self.called)
+
+    def tearDown(self):
+        sys.setprofile(None)
+
+    def trace_func(self, frame, event, arg):
+        self.called = True
+        return self.trace_func
+
+    def during_threads(self):
+        if self.set:
+            sys.setprofile(self.trace_func)
+        else:
+            sys.setprofile(None)
+        self.set = not self.set
+
+
+@unittest.skipIf(is_wasi, "WASI has no threads.")
+class MonitoringMisc(MonitoringTestMixin, TestCase):
+    def register_callback(self):
+        def callback(*args):
+            pass
+
+        for i in range(200):
+            monitoring.register_callback(self.tool_id, monitoring.events.LINE, callback)
+
+        self.refs.append(weakref.ref(callback))
+
+    def test_register_callback(self):
+        self.refs = []
+        threads = []
+        for i in range(50):
+            t = Thread(target=self.register_callback)
+            t.start()
+            threads.append(t)
+
+        for thread in threads:
+            thread.join()
+
+        monitoring.register_callback(self.tool_id, monitoring.events.LINE, None)
+        for ref in self.refs:
+            self.assertEqual(ref(), None)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Makefile.pre.in b/Makefile.pre.in

index fd8678cdaf820747451606c1937e543b3176e176..f7c21a380caa99f8fc6b73ded31ce4e8c55dc111 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -2366,6 +2366,7 @@ TESTSUBDIRS=      idlelib/idle_test \
                 test/test_doctest \
                 test/test_email \
                 test/test_email/data \
+               test/test_free_threading \
                 test/test_future_stmt \
                 test/test_gdb \
                 test/test_import \
diff --git a/Python/bytecodes.c b/Python/bytecodes.c

index c34d702f06418e88348754443650298d9afb5d94..c1fbd3c7d26e01ea53e7fbcaa66a742862fc8cbf 100644 (file)
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -20,6 +20,7 @@
  #include "pycore_object.h"        // _PyObject_GC_TRACK()
  #include "pycore_opcode_metadata.h"  // uop names
  #include "pycore_opcode_utils.h"  // MAKE_FUNCTION_*
+#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE
  #include "pycore_pyerrors.h"      // _PyErr_GetRaisedException()
  #include "pycore_pystate.h"       // _PyInterpreterState_GET()
  #include "pycore_range.h"         // _PyRangeIterObject
@@ -150,10 +151,11 @@ dummy_func(
              uintptr_t global_version =
                  _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
                  ~_PY_EVAL_EVENTS_MASK;
-            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            PyCodeObject *code = _PyFrame_GetCode(frame);
+            uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
              assert((code_version & 255) == 0);
              if (code_version != global_version) {
-                int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
+                int err = _Py_Instrument(code, tstate->interp);
                  ERROR_IF(err, error);
                  next_instr = this_instr;
              }
@@ -171,14 +173,14 @@ dummy_func(
              _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
  #endif
              uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
              assert((version & _PY_EVAL_EVENTS_MASK) == 0);
              DEOPT_IF(eval_breaker != version);
          }
  
          inst(INSTRUMENTED_RESUME, (--)) {
              uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
-            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
              if (code_version != global_version) {
                  if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
                      ERROR_NO_POP();
@@ -2377,7 +2379,14 @@ dummy_func(
          };
  
          tier1 inst(ENTER_EXECUTOR, (--)) {
+            int prevoparg = oparg;
              CHECK_EVAL_BREAKER();
+            if (this_instr->op.code != ENTER_EXECUTOR ||
+                this_instr->op.arg != prevoparg) {
+                next_instr = this_instr;
+                DISPATCH();
+            }
+
              PyCodeObject *code = _PyFrame_GetCode(frame);
              _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
              assert(executor->vm_data.index == INSTR_OFFSET() - 1);
diff --git a/Python/ceval.c b/Python/ceval.c

index b88e555ded5c2ee132fc6eef40a545634af57b90..2f217c5f33c6cef1aa182bebe9b277936a8aaff7 100644 (file)
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -20,6 +20,7 @@
  #include "pycore_opcode_metadata.h" // EXTRA_CASES
  #include "pycore_optimizer.h"     // _PyUOpExecutor_Type
  #include "pycore_opcode_utils.h"  // MAKE_FUNCTION_*
+#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE
  #include "pycore_pyerrors.h"      // _PyErr_GetRaisedException()
  #include "pycore_pystate.h"       // _PyInterpreterState_GET()
  #include "pycore_range.h"         // _PyRangeIterObject
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h

index fccff24a41858614a27954797713702e47cc9593..df87f9178f17cfc732e0bde7edbe6bcf9d1e2e0f 100644 (file)
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -21,7 +21,7 @@
              _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
              #endif
              uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
              assert((version & _PY_EVAL_EVENTS_MASK) == 0);
              if (eval_breaker != version) {
                  UOP_STAT_INC(uopcode, miss);
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h

index a7764b0ec12e108953d15039bf72912ad82976fa..a426d9e208492ec6ddb349f5a928c658140eb9e6 100644 (file)
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2498,10 +2498,17 @@
          }
  
          TARGET(ENTER_EXECUTOR) {
-            frame->instr_ptr = next_instr;
+            _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
+            (void)this_instr;
              next_instr += 1;
              INSTRUCTION_STATS(ENTER_EXECUTOR);
+            int prevoparg = oparg;
              CHECK_EVAL_BREAKER();
+            if (this_instr->op.code != ENTER_EXECUTOR ||
+                this_instr->op.arg != prevoparg) {
+                next_instr = this_instr;
+                DISPATCH();
+            }
              PyCodeObject *code = _PyFrame_GetCode(frame);
              _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
              assert(executor->vm_data.index == INSTR_OFFSET() - 1);
@@ -3267,7 +3274,7 @@
              next_instr += 1;
              INSTRUCTION_STATS(INSTRUMENTED_RESUME);
              uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
-            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
              if (code_version != global_version) {
                  if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
                      goto error;
@@ -4927,10 +4934,11 @@
              uintptr_t global_version =
              _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
              ~_PY_EVAL_EVENTS_MASK;
-            uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            PyCodeObject *code = _PyFrame_GetCode(frame);
+            uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
              assert((code_version & 255) == 0);
              if (code_version != global_version) {
-                int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
+                int err = _Py_Instrument(code, tstate->interp);
                  if (err) goto error;
                  next_instr = this_instr;
              }
@@ -4953,7 +4961,7 @@
              _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
              #endif
              uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
-            uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
+            uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
              assert((version & _PY_EVAL_EVENTS_MASK) == 0);
              DEOPT_IF(eval_breaker != version, RESUME);
              DISPATCH();
diff --git a/Python/instrumentation.c b/Python/instrumentation.c

index 3866144a19bf7475a186033f0a0fd135b7ceedcf..71efeff077633d9e10370c980d3adc07e655b69c 100644 (file)
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -6,6 +6,7 @@
  #include "pycore_call.h"
  #include "pycore_ceval.h"         // _PY_EVAL_EVENTS_BITS
  #include "pycore_code.h"          // _PyCode_Clear_Executors()
+#include "pycore_critical_section.h"
  #include "pycore_frame.h"
  #include "pycore_interp.h"
  #include "pycore_long.h"
@@ -13,12 +14,43 @@
  #include "pycore_namespace.h"
  #include "pycore_object.h"
  #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches
+#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINTPTR_RELEASE
  #include "pycore_pyerrors.h"
  #include "pycore_pystate.h"       // _PyInterpreterState_GET()
  
  /* Uncomment this to dump debugging output when assertions fail */
  // #define INSTRUMENT_DEBUG 1
  
+#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED)
+
+#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj)                         \
+    if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) {   \
+        _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);             \
+    }
+#define ASSERT_WORLD_STOPPED() assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
+
+#else
+
+#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj)
+#define ASSERT_WORLD_STOPPED()
+
+#endif
+
+#ifdef Py_GIL_DISABLED
+
+#define LOCK_CODE(code)                                             \
+    assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \
+    Py_BEGIN_CRITICAL_SECTION(code)
+
+#define UNLOCK_CODE()   Py_END_CRITICAL_SECTION()
+
+#else
+
+#define LOCK_CODE(code)
+#define UNLOCK_CODE()
+
+#endif
+
  PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type);
  
  PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type);
@@ -278,6 +310,8 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta)
  int
  _PyInstruction_GetLength(PyCodeObject *code, int offset)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      int opcode = _PyCode_CODE(code)[offset].op.code;
      assert(opcode != 0);
      assert(opcode != RESERVED);
@@ -424,6 +458,7 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out)
  }
  
  #define CHECK(test) do { \
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code); \
      if (!(test)) { \
          dump_instrumentation_data(code, i, stderr); \
      } \
@@ -449,6 +484,8 @@ valid_opcode(int opcode)
  static void
  sanity_check_instrumentation(PyCodeObject *code)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      _PyCoMonitoringData *data = code->_co_monitoring;
      if (data == NULL) {
          return;
@@ -718,6 +755,7 @@ instrument_per_instruction(PyCodeObject *code, int i)
  static void
  remove_tools(PyCodeObject * code, int offset, int event, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
      assert(event != PY_MONITORING_EVENT_LINE);
      assert(event != PY_MONITORING_EVENT_INSTRUCTION);
      assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event));
@@ -752,6 +790,8 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools)
  static void
  remove_line_tools(PyCodeObject * code, int offset, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      assert(code->_co_monitoring);
      if (code->_co_monitoring->line_tools)
      {
@@ -774,6 +814,7 @@ remove_line_tools(PyCodeObject * code, int offset, int tools)
  static void
  add_tools(PyCodeObject * code, int offset, int event, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
      assert(event != PY_MONITORING_EVENT_LINE);
      assert(event != PY_MONITORING_EVENT_INSTRUCTION);
      assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event));
@@ -794,6 +835,8 @@ add_tools(PyCodeObject * code, int offset, int event, int tools)
  static void
  add_line_tools(PyCodeObject * code, int offset, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_LINE, tools));
      assert(code->_co_monitoring);
      if (code->_co_monitoring->line_tools) {
@@ -810,6 +853,8 @@ add_line_tools(PyCodeObject * code, int offset, int tools)
  static void
  add_per_instruction_tools(PyCodeObject * code, int offset, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_INSTRUCTION, tools));
      assert(code->_co_monitoring);
      if (code->_co_monitoring->per_instruction_tools) {
@@ -826,6 +871,8 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools)
  static void
  remove_per_instruction_tools(PyCodeObject * code, int offset, int tools)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      assert(code->_co_monitoring);
      if (code->_co_monitoring->per_instruction_tools) {
          uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset];
@@ -1056,7 +1103,9 @@ call_instrumentation_vector(
                  break;
              }
              else {
+                LOCK_CODE(code);
                  remove_tools(code, offset, event, 1 << tool);
+                UNLOCK_CODE();
              }
          }
      }
@@ -1189,6 +1238,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
              goto done;
          }
      }
+
      uint8_t tools = code->_co_monitoring->line_tools != NULL ?
          code->_co_monitoring->line_tools[i] :
          (interp->monitors.tools[PY_MONITORING_EVENT_LINE] |
@@ -1249,7 +1299,9 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
          }
          else {
              /* DISABLE  */
+            LOCK_CODE(code);
              remove_line_tools(code, i, 1 << tool);
+            UNLOCK_CODE();
          }
      } while (tools);
      Py_DECREF(line_obj);
@@ -1305,7 +1357,9 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame*
          }
          else {
              /* DISABLE  */
+            LOCK_CODE(code);
              remove_per_instruction_tools(code, offset, 1 << tool);
+            UNLOCK_CODE();
          }
      }
      Py_DECREF(offset_obj);
@@ -1320,15 +1374,18 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj)
      PyInterpreterState *is = _PyInterpreterState_GET();
      assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS);
      assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS);
-    PyObject *callback = is->monitoring_callables[tool_id][event_id];
-    is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj);
+    PyObject *callback = _Py_atomic_exchange_ptr(&is->monitoring_callables[tool_id][event_id],
+                                                 Py_XNewRef(obj));
+
      return callback;
  }
  
  static void
  initialize_tools(PyCodeObject *code)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
      uint8_t* tools = code->_co_monitoring->tools;
+
      assert(tools != NULL);
      int code_len = (int)Py_SIZE(code);
      for (int i = 0; i < code_len; i++) {
@@ -1384,7 +1441,9 @@ initialize_tools(PyCodeObject *code)
  static void
  initialize_lines(PyCodeObject *code)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
      _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines;
+
      assert(line_data != NULL);
      int code_len = (int)Py_SIZE(code);
      PyCodeAddressRange range;
@@ -1501,7 +1560,9 @@ initialize_lines(PyCodeObject *code)
  static void
  initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
      uint8_t *line_tools = code->_co_monitoring->line_tools;
+
      assert(line_tools != NULL);
      int code_len = (int)Py_SIZE(code);
      for (int i = 0; i < code_len; i++) {
@@ -1512,6 +1573,7 @@ initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events)
  static int
  allocate_instrumentation_data(PyCodeObject *code)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
  
      if (code->_co_monitoring == NULL) {
          code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData));
@@ -1533,6 +1595,8 @@ allocate_instrumentation_data(PyCodeObject *code)
  static int
  update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      int code_len = (int)Py_SIZE(code);
      if (allocate_instrumentation_data(code)) {
          return -1;
@@ -1594,9 +1658,11 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp)
      return 0;
  }
  
-int
-_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
+static int
+instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp)
  {
+    ASSERT_WORLD_STOPPED_OR_LOCKED(code);
+
      if (is_version_up_to_date(code, interp)) {
          assert(
              interp->ceval.instrumentation_version == 0 ||
@@ -1636,12 +1702,8 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
          assert(monitors_are_empty(monitors_and(new_events, removed_events)));
      }
      code->_co_monitoring->active_monitors = active_events;
-    code->_co_instrumentation_version = global_version(interp);
      if (monitors_are_empty(new_events) && monitors_are_empty(removed_events)) {
-#ifdef INSTRUMENT_DEBUG
-        sanity_check_instrumentation(code);
-#endif
-        return 0;
+        goto done;
      }
      /* Insert instrumentation */
      for (int i = code->_co_firsttraceable; i < code_len; i+= _PyInstruction_GetLength(code, i)) {
@@ -1730,12 +1792,26 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
              i += _PyInstruction_GetLength(code, i);
          }
      }
+done:
+    FT_ATOMIC_STORE_UINTPTR_RELEASE(code->_co_instrumentation_version,
+                                    global_version(interp));
+
  #ifdef INSTRUMENT_DEBUG
      sanity_check_instrumentation(code);
  #endif
      return 0;
  }
  
+int
+_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
+{
+    int res;
+    LOCK_CODE(code);
+    res = instrument_lock_held(code, interp);
+    UNLOCK_CODE();
+    return res;
+}
+
  #define C_RETURN_EVENTS \
      ((1 << PY_MONITORING_EVENT_C_RETURN) | \
       (1 << PY_MONITORING_EVENT_C_RAISE))
@@ -1746,6 +1822,8 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
  
  static int
  instrument_all_executing_code_objects(PyInterpreterState *interp) {
+    ASSERT_WORLD_STOPPED();
+
      _PyRuntimeState *runtime = &_PyRuntime;
      HEAD_LOCK(runtime);
      PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
@@ -1754,7 +1832,7 @@ instrument_all_executing_code_objects(PyInterpreterState *interp) {
          _PyInterpreterFrame *frame = ts->current_frame;
          while (frame) {
              if (frame->owner != FRAME_OWNED_BY_CSTACK) {
-                if (_Py_Instrument(_PyFrame_GetCode(frame), interp)) {
+                if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) {
                      return -1;
                  }
              }
@@ -1817,19 +1895,27 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
      if (check_tool(interp, tool_id)) {
          return -1;
      }
+
+    int res;
+    _PyEval_StopTheWorld(interp);
      uint32_t existing_events = get_events(&interp->monitors, tool_id);
      if (existing_events == events) {
-        return 0;
+        res = 0;
+        goto done;
      }
      set_events(&interp->monitors, tool_id, events);
      uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
      if (new_version == 0) {
          PyErr_Format(PyExc_OverflowError, "events set too many times");
-        return -1;
+        res = -1;
+        goto done;
      }
      set_global_version(tstate, new_version);
      _Py_Executors_InvalidateAll(interp, 1);
-    return instrument_all_executing_code_objects(interp);
+    res = instrument_all_executing_code_objects(interp);
+done:
+    _PyEval_StartTheWorld(interp);
+    return res;
  }
  
  int
@@ -1845,24 +1931,33 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent
      if (check_tool(interp, tool_id)) {
          return -1;
      }
+
+    int res;
+    LOCK_CODE(code);
      if (allocate_instrumentation_data(code)) {
-        return -1;
+        res = -1;
+        goto done;
      }
+
      _Py_LocalMonitors *local = &code->_co_monitoring->local_monitors;
      uint32_t existing_events = get_local_events(local, tool_id);
      if (existing_events == events) {
-        return 0;
+        res = 0;
+        goto done;
      }
      set_local_events(local, tool_id, events);
      if (is_version_up_to_date(code, interp)) {
          /* Force instrumentation update */
          code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT;
      }
+
      _Py_Executors_InvalidateDependency(interp, code, 1);
-    if (_Py_Instrument(code, interp)) {
-        return -1;
-    }
-    return 0;
+
+    res = instrument_lock_held(code, interp);
+
+done:
+    UNLOCK_CODE();
+    return res;
  }
  
  int
@@ -2158,15 +2253,21 @@ monitoring_restart_events_impl(PyObject *module)
       */
      PyThreadState *tstate = _PyThreadState_GET();
      PyInterpreterState *interp = tstate->interp;
+
+    _PyEval_StopTheWorld(interp);
      uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
      uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT;
      if (new_version <= MONITORING_VERSION_INCREMENT) {
+        _PyEval_StartTheWorld(interp);
          PyErr_Format(PyExc_OverflowError, "events set too many times");
          return NULL;
      }
      interp->last_restart_version = restart_version;
      set_global_version(tstate, new_version);
-    if (instrument_all_executing_code_objects(interp)) {
+    int res = instrument_all_executing_code_objects(interp);
+    _PyEval_StartTheWorld(interp);
+
+    if (res) {
          return NULL;
      }
      Py_RETURN_NONE;
diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c

index ccbb3eb3f7c82a2502c0743c072e3e16f8b39908..d7aae7d2343ac2a82d2b1e044be92e8184193592 100644 (file)
--- a/Python/legacy_tracing.c
+++ b/Python/legacy_tracing.c
@@ -16,6 +16,13 @@ typedef struct _PyLegacyEventHandler {
      int event;
  } _PyLegacyEventHandler;
  
+#ifdef Py_GIL_DISABLED
+#define LOCK_SETUP()    PyMutex_Lock(&_PyRuntime.ceval.sys_trace_profile_mutex);
+#define UNLOCK_SETUP()  PyMutex_Unlock(&_PyRuntime.ceval.sys_trace_profile_mutex);
+#else
+#define LOCK_SETUP()
+#define UNLOCK_SETUP()
+#endif
  /* The Py_tracefunc function expects the following arguments:
   *   obj: the trace object (PyObject *)
   *   frame: the current frame (PyFrameObject *)
@@ -414,19 +421,10 @@ is_tstate_valid(PyThreadState *tstate)
  }
  #endif
  
-int
-_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
+static Py_ssize_t
+setup_profile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_profileobj)
  {
-    assert(is_tstate_valid(tstate));
-    /* The caller must hold the GIL */
-    assert(PyGILState_Check());
-
-    /* Call _PySys_Audit() in the context of the current thread state,
-       even if tstate is not the current thread state. */
-    PyThreadState *current_tstate = _PyThreadState_GET();
-    if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) {
-        return -1;
-    }
+    *old_profileobj = NULL;
      /* Setup PEP 669 monitoring callbacks and events. */
      if (!tstate->interp->sys_profile_initialized) {
          tstate->interp->sys_profile_initialized = true;
@@ -469,25 +467,15 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
  
      int delta = (func != NULL) - (tstate->c_profilefunc != NULL);
      tstate->c_profilefunc = func;
-    PyObject *old_profileobj = tstate->c_profileobj;
+    *old_profileobj = tstate->c_profileobj;
      tstate->c_profileobj = Py_XNewRef(arg);
-    Py_XDECREF(old_profileobj);
      tstate->interp->sys_profiling_threads += delta;
      assert(tstate->interp->sys_profiling_threads >= 0);
-
-    uint32_t events = 0;
-    if (tstate->interp->sys_profiling_threads) {
-        events =
-            (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) |
-            (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) |
-            (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) |
-            (1 << PY_MONITORING_EVENT_PY_THROW);
-    }
-    return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events);
+    return tstate->interp->sys_profiling_threads;
  }
  
  int
-_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
+_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
  {
      assert(is_tstate_valid(tstate));
      /* The caller must hold the GIL */
@@ -496,11 +484,32 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
      /* Call _PySys_Audit() in the context of the current thread state,
         even if tstate is not the current thread state. */
      PyThreadState *current_tstate = _PyThreadState_GET();
-    if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) {
+    if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) {
          return -1;
      }
  
-    assert(tstate->interp->sys_tracing_threads >= 0);
+    // needs to be decref'd outside of the lock
+    PyObject *old_profileobj;
+    LOCK_SETUP();
+    Py_ssize_t profiling_threads = setup_profile(tstate, func, arg, &old_profileobj);
+    UNLOCK_SETUP();
+    Py_XDECREF(old_profileobj);
+
+    uint32_t events = 0;
+    if (profiling_threads) {
+        events =
+            (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) |
+            (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) |
+            (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) |
+            (1 << PY_MONITORING_EVENT_PY_THROW);
+    }
+    return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events);
+}
+
+static Py_ssize_t
+setup_tracing(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_traceobj)
+{
+    *old_traceobj = NULL;
      /* Setup PEP 669 monitoring callbacks and events. */
      if (!tstate->interp->sys_trace_initialized) {
          tstate->interp->sys_trace_initialized = true;
@@ -553,14 +562,39 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
  
      int delta = (func != NULL) - (tstate->c_tracefunc != NULL);
      tstate->c_tracefunc = func;
-    PyObject *old_traceobj = tstate->c_traceobj;
+    *old_traceobj = tstate->c_traceobj;
      tstate->c_traceobj = Py_XNewRef(arg);
-    Py_XDECREF(old_traceobj);
      tstate->interp->sys_tracing_threads += delta;
      assert(tstate->interp->sys_tracing_threads >= 0);
+    return tstate->interp->sys_tracing_threads;
+}
+
+int
+_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
+{
+    assert(is_tstate_valid(tstate));
+    /* The caller must hold the GIL */
+    assert(PyGILState_Check());
+
+    /* Call _PySys_Audit() in the context of the current thread state,
+       even if tstate is not the current thread state. */
+    PyThreadState *current_tstate = _PyThreadState_GET();
+    if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) {
+        return -1;
+    }
+    assert(tstate->interp->sys_tracing_threads >= 0);
+    // needs to be decref'd outside of the lock
+    PyObject *old_traceobj;
+    LOCK_SETUP();
+    Py_ssize_t tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj);
+    UNLOCK_SETUP();
+    Py_XDECREF(old_traceobj);
+    if (tracing_threads < 0) {
+        return -1;
+    }
  
      uint32_t events = 0;
-    if (tstate->interp->sys_tracing_threads) {
+    if (tracing_threads) {
          events =
              (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) |
              (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) |
diff --git a/Python/pystate.c b/Python/pystate.c

index 37480df88aeb7264eb4336210871e1bd9305e436..06806bd75fbcb27fd2b46d70020515b48de63d8a 100644 (file)
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -399,6 +399,7 @@ _Py_COMP_DIAG_POP
          &(runtime)->unicode_state.ids.mutex, \
          &(runtime)->imports.extensions.mutex, \
          &(runtime)->ceval.pending_mainthread.mutex, \
+        &(runtime)->ceval.sys_trace_profile_mutex, \
          &(runtime)->atexit.mutex, \
          &(runtime)->audit_hooks.mutex, \
          &(runtime)->allocators.mutex, \
diff --git a/Tools/jit/template.c b/Tools/jit/template.c

index b195aff377b3b5a05a5682442be1629093ea2b37..228dc83254d678cf77aa0813997c1f1c6c0fc5ed 100644 (file)
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -12,6 +12,7 @@
  #include "pycore_opcode_metadata.h"
  #include "pycore_opcode_utils.h"
  #include "pycore_optimizer.h"
+#include "pycore_pyatomic_ft_wrappers.h"
  #include "pycore_range.h"
  #include "pycore_setobject.h"
  #include "pycore_sliceobject.h"
author	Dino Viehland <dinoviehland@meta.com>
	Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)
committer	GitHub <noreply@github.com>
	Fri, 19 Apr 2024 21:47:42 +0000 (14:47 -0700)
Include/cpython/pyatomic.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_gcc.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_msc.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_std.h		patch \| blob \| blame \| history
Include/internal/pycore_ceval_state.h		patch \| blob \| blame \| history
Include/internal/pycore_gc.h		patch \| blob \| blame \| history
Include/internal/pycore_pyatomic_ft_wrappers.h		patch \| blob \| blame \| history
Lib/test/test_free_threading/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/test/test_free_threading/test_monitoring.py	[new file with mode: 0644]	patch \| blob
Makefile.pre.in		patch \| blob \| blame \| history
Python/bytecodes.c		patch \| blob \| blame \| history
Python/ceval.c		patch \| blob \| blame \| history
Python/executor_cases.c.h		patch \| blob \| blame \| history
Python/generated_cases.c.h		patch \| blob \| blame \| history
Python/instrumentation.c		patch \| blob \| blame \| history
Python/legacy_tracing.c		patch \| blob \| blame \| history
Python/pystate.c		patch \| blob \| blame \| history
Tools/jit/template.c		patch \| blob \| blame \| history