]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-91048: Fix external inspection multi-threaded performance (#136005)
authorPablo Galindo Salgado <Pablogsal@gmail.com>
Sat, 28 Jun 2025 13:11:31 +0000 (14:11 +0100)
committerGitHub <noreply@github.com>
Sat, 28 Jun 2025 13:11:31 +0000 (14:11 +0100)
Include/internal/pycore_global_objects_fini_generated.h
Include/internal/pycore_global_strings.h
Include/internal/pycore_runtime_init_generated.h
Include/internal/pycore_unicodeobject_generated.h
Lib/test/test_external_inspection.py
Modules/_remote_debugging_module.c
Modules/clinic/_remote_debugging_module.c.h
Python/remote_debug.h
Tools/inspection/benchmark_external_inspection.py

index e118b86db5075426a263b728ae62978e3c5a62aa..c461bc1786ddf4b6fd2b458b9e63b701b19ef4f1 100644 (file)
@@ -1136,6 +1136,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset_src));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(on_type_read));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(onceregistry));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_active_thread));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_keys));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(oparg));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcode));
index 36f3d23d095d59f57fec53c2dbf37004f5ed44f5..72c2051bd97660807f4724baa1e7fbf6f45d6282 100644 (file)
@@ -627,6 +627,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(offset_src)
         STRUCT_FOR_ID(on_type_read)
         STRUCT_FOR_ID(onceregistry)
+        STRUCT_FOR_ID(only_active_thread)
         STRUCT_FOR_ID(only_keys)
         STRUCT_FOR_ID(oparg)
         STRUCT_FOR_ID(opcode)
index d172cc1485d426852850fb357215620171109cbc..d378fcae26cf351e1b4c89faed5243272e666a32 100644 (file)
@@ -1134,6 +1134,7 @@ extern "C" {
     INIT_ID(offset_src), \
     INIT_ID(on_type_read), \
     INIT_ID(onceregistry), \
+    INIT_ID(only_active_thread), \
     INIT_ID(only_keys), \
     INIT_ID(oparg), \
     INIT_ID(opcode), \
index 0a9be4e41ace892efa0887bdfef58e9ee7a0bc85..e516211f6c6cbc62c5ce84e6d58848fe762c4859 100644 (file)
@@ -2296,6 +2296,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(only_active_thread);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(only_keys);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
index 90214e814f2b35efd0301b5f2f685b3c461159d9..0f31c225e68de37e837d853b06886b0cd0479c9e 100644 (file)
@@ -7,7 +7,7 @@ import socket
 import threading
 from asyncio import staggered, taskgroups, base_events, tasks
 from unittest.mock import ANY
-from test.support import os_helper, SHORT_TIMEOUT, busy_retry
+from test.support import os_helper, SHORT_TIMEOUT, busy_retry, requires_gil_enabled
 from test.support.script_helper import make_script
 from test.support.socket_helper import find_unused_port
 
@@ -876,6 +876,126 @@ class TestGetStackTrace(unittest.TestCase):
             ],
         )
 
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    @requires_gil_enabled("Free threaded builds don't have an 'active thread'")
+    def test_only_active_thread(self):
+        # Test that only_active_thread parameter works correctly
+        port = find_unused_port()
+        script = textwrap.dedent(
+            f"""\
+            import time, sys, socket, threading
+
+            # Connect to the test process
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(('localhost', {port}))
+
+            def worker_thread(name, barrier, ready_event):
+                barrier.wait()  # Synchronize thread start
+                ready_event.wait()  # Wait for main thread signal
+                # Sleep to keep thread alive
+                time.sleep(10_000)
+
+            def main_work():
+                # Do busy work to hold the GIL
+                sock.sendall(b"working\\n")
+                count = 0
+                while count < 100000000:
+                    count += 1
+                    if count % 10000000 == 0:
+                        pass  # Keep main thread busy
+                sock.sendall(b"done\\n")
+
+            # Create synchronization primitives
+            num_threads = 3
+            barrier = threading.Barrier(num_threads + 1)  # +1 for main thread
+            ready_event = threading.Event()
+
+            # Start worker threads
+            threads = []
+            for i in range(num_threads):
+                t = threading.Thread(target=worker_thread, args=(f"Worker-{{i}}", barrier, ready_event))
+                t.start()
+                threads.append(t)
+
+            # Wait for all threads to be ready
+            barrier.wait()
+
+            # Signal ready to parent process
+            sock.sendall(b"ready\\n")
+
+            # Signal threads to start waiting
+            ready_event.set()
+
+            # Give threads time to start sleeping
+            time.sleep(0.1)
+
+            # Now do busy work to hold the GIL
+            main_work()
+            """
+        )
+
+        with os_helper.temp_dir() as work_dir:
+            script_dir = os.path.join(work_dir, "script_pkg")
+            os.mkdir(script_dir)
+
+            # Create a socket server to communicate with the target process
+            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server_socket.bind(("localhost", port))
+            server_socket.settimeout(SHORT_TIMEOUT)
+            server_socket.listen(1)
+
+            script_name = _make_test_script(script_dir, "script", script)
+            client_socket = None
+            try:
+                p = subprocess.Popen([sys.executable, script_name])
+                client_socket, _ = server_socket.accept()
+                server_socket.close()
+
+                # Wait for ready signal
+                response = b""
+                while b"ready" not in response:
+                    response += client_socket.recv(1024)
+
+                # Wait for the main thread to start its busy work
+                while b"working" not in response:
+                    response += client_socket.recv(1024)
+
+                # Get stack trace with all threads
+                unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
+                all_traces = unwinder_all.get_stack_trace()
+
+                # Get stack trace with only GIL holder
+                unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True)
+                gil_traces = unwinder_gil.get_stack_trace()
+
+            except PermissionError:
+                self.skipTest(
+                    "Insufficient permissions to read the stack trace"
+                )
+            finally:
+                if client_socket is not None:
+                    client_socket.close()
+                p.kill()
+                p.terminate()
+                p.wait(timeout=SHORT_TIMEOUT)
+
+            # Verify we got multiple threads in all_traces
+            self.assertGreater(len(all_traces), 1, "Should have multiple threads")
+
+            # Verify we got exactly one thread in gil_traces
+            self.assertEqual(len(gil_traces), 1, "Should have exactly one GIL holder")
+
+            # The GIL holder should be in the all_traces list
+            gil_thread_id = gil_traces[0][0]
+            all_thread_ids = [trace[0] for trace in all_traces]
+            self.assertIn(gil_thread_id, all_thread_ids,
+                         "GIL holder should be among all threads")
+
 
 if __name__ == "__main__":
     unittest.main()
index c2421cac6bdb173f7049a9205935a7d2334fdbec..ce7189637c2d691e2db4b702df5f91af1821af3d 100644 (file)
 #endif
 
 #ifdef Py_GIL_DISABLED
-#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
-                                      offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
-                                  offsetof(PyInterpreterState, threads.head) + sizeof(void*))
+#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+                                          offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
+                                      offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+                                  offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
 #else
-#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
-                                  offsetof(PyInterpreterState, threads.head) + sizeof(void*))
+#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+                                      offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+                                  offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
 #endif
 #define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
 
@@ -206,6 +208,7 @@ typedef struct {
     uint64_t code_object_generation;
     _Py_hashtable_t *code_object_cache;
     int debug;
+    int only_active_thread;
     RemoteDebuggingState *cached_state;  // Cached module state
 #ifdef Py_GIL_DISABLED
     // TLBC cache invalidation tracking
@@ -2496,6 +2499,7 @@ _remote_debugging.RemoteUnwinder.__init__
     pid: int
     *
     all_threads: bool = False
+    only_active_thread: bool = False
     debug: bool = False
 
 Initialize a new RemoteUnwinder object for debugging a remote Python process.
@@ -2504,6 +2508,8 @@ Args:
     pid: Process ID of the target Python process to debug
     all_threads: If True, initialize state for all threads in the process.
                 If False, only initialize for the main thread.
+    only_active_thread: If True, only sample the thread holding the GIL.
+                       Cannot be used together with all_threads=True.
     debug: If True, chain exceptions to explain the sequence of events that
            lead to the exception.
 
@@ -2514,15 +2520,33 @@ Raises:
     PermissionError: If access to the target process is denied
     OSError: If unable to attach to the target process or access its memory
     RuntimeError: If unable to read debug information from the target process
+    ValueError: If both all_threads and only_active_thread are True
 [clinic start generated code]*/
 
 static int
 _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
                                                int pid, int all_threads,
+                                               int only_active_thread,
                                                int debug)
-/*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/
+/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/
 {
+    // Validate that all_threads and only_active_thread are not both True
+    if (all_threads && only_active_thread) {
+        PyErr_SetString(PyExc_ValueError,
+                       "all_threads and only_active_thread cannot both be True");
+        return -1;
+    }
+
+#ifdef Py_GIL_DISABLED
+    if (only_active_thread) {
+        PyErr_SetString(PyExc_ValueError,
+                       "only_active_thread is not supported when Py_GIL_DISABLED is not defined");
+        return -1;
+    }
+#endif
+
     self->debug = debug;
+    self->only_active_thread = only_active_thread;
     self->cached_state = NULL;
     if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
         set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
@@ -2602,13 +2626,18 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
 @critical_section
 _remote_debugging.RemoteUnwinder.get_stack_trace
 
-Returns a list of stack traces for all threads in the target process.
+Returns a list of stack traces for threads in the target process.
 
 Each element in the returned list is a tuple of (thread_id, frame_list), where:
 - thread_id is the OS thread identifier
 - frame_list is a list of tuples (function_name, filename, line_number) representing
   the Python stack frames for that thread, ordered from most recent to oldest
 
+The threads returned depend on the initialization parameters:
+- If only_active_thread was True: returns only the thread holding the GIL
+- If all_threads was True: returns all threads
+- Otherwise: returns only the main thread
+
 Example:
     [
         (1234, [
@@ -2632,7 +2661,7 @@ Raises:
 
 static PyObject *
 _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
-/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/
+/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/
 {
     PyObject* result = NULL;
     // Read interpreter state into opaque buffer
@@ -2655,6 +2684,28 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
         _Py_hashtable_clear(self->code_object_cache);
     }
 
+    // If only_active_thread is true, we need to determine which thread holds the GIL
+    PyThreadState* gil_holder = NULL;
+    if (self->only_active_thread) {
+        // The GIL state is already in interp_state_buffer, just read from there
+        // Check if GIL is locked
+        int gil_locked = GET_MEMBER(int, interp_state_buffer,
+            self->debug_offsets.interpreter_state.gil_runtime_state_locked);
+
+        if (gil_locked) {
+            // Get the last holder (current holder when GIL is locked)
+            gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer,
+                self->debug_offsets.interpreter_state.gil_runtime_state_holder);
+        } else {
+            // GIL is not locked, return empty list
+            result = PyList_New(0);
+            if (!result) {
+                set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list");
+            }
+            goto exit;
+        }
+    }
+
 #ifdef Py_GIL_DISABLED
     // Check TLBC generation and invalidate cache if needed
     uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
@@ -2666,7 +2717,10 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
 #endif
 
     uintptr_t current_tstate;
-    if (self->tstate_addr == 0) {
+    if (self->only_active_thread && gil_holder != NULL) {
+        // We have the GIL holder, process only that thread
+        current_tstate = (uintptr_t)gil_holder;
+    } else if (self->tstate_addr == 0) {
         // Get threads head from buffer
         current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
                 self->debug_offsets.interpreter_state.threads_head);
@@ -2700,10 +2754,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
         if (self->tstate_addr) {
             break;
         }
+
+        // If we're only processing the GIL holder, we're done after one iteration
+        if (self->only_active_thread && gil_holder != NULL) {
+            break;
+        }
     }
 
 exit:
-   _Py_RemoteDebug_ClearCache(&self->handle);
     return result;
 }
 
@@ -2827,11 +2885,9 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s
         goto result_err;
     }
 
-    _Py_RemoteDebug_ClearCache(&self->handle);
     return result;
 
 result_err:
-    _Py_RemoteDebug_ClearCache(&self->handle);
     Py_XDECREF(result);
     return NULL;
 }
@@ -2898,11 +2954,9 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject
         goto cleanup;
     }
 
-    _Py_RemoteDebug_ClearCache(&self->handle);
     return result;
 
 cleanup:
-    _Py_RemoteDebug_ClearCache(&self->handle);
     Py_XDECREF(result);
     return NULL;
 }
@@ -2928,7 +2982,6 @@ RemoteUnwinder_dealloc(PyObject *op)
     }
 #endif
     if (self->handle.pid != 0) {
-        _Py_RemoteDebug_ClearCache(&self->handle);
         _Py_RemoteDebug_CleanupProcHandle(&self->handle);
     }
     PyObject_Del(self);
index 5c313a2d66404aff91af45a6093c960b3205f5c8..e80b24b54c0ffacde0ce116f534b57b0c38ee6b9 100644 (file)
@@ -10,7 +10,8 @@ preserve
 #include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
 
 PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
-"RemoteUnwinder(pid, *, all_threads=False, debug=False)\n"
+"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
+"               debug=False)\n"
 "--\n"
 "\n"
 "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
@@ -19,6 +20,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
 "    pid: Process ID of the target Python process to debug\n"
 "    all_threads: If True, initialize state for all threads in the process.\n"
 "                If False, only initialize for the main thread.\n"
+"    only_active_thread: If True, only sample the thread holding the GIL.\n"
+"                       Cannot be used together with all_threads=True.\n"
 "    debug: If True, chain exceptions to explain the sequence of events that\n"
 "           lead to the exception.\n"
 "\n"
@@ -28,11 +31,13 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
 "Raises:\n"
 "    PermissionError: If access to the target process is denied\n"
 "    OSError: If unable to attach to the target process or access its memory\n"
-"    RuntimeError: If unable to read debug information from the target process");
+"    RuntimeError: If unable to read debug information from the target process\n"
+"    ValueError: If both all_threads and only_active_thread are True");
 
 static int
 _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
                                                int pid, int all_threads,
+                                               int only_active_thread,
                                                int debug);
 
 static int
@@ -41,7 +46,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
     int return_value = -1;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
 
-    #define NUM_KEYWORDS 3
+    #define NUM_KEYWORDS 4
     static struct {
         PyGC_Head _this_is_not_used;
         PyObject_VAR_HEAD
@@ -50,7 +55,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
     } _kwtuple = {
         .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
         .ob_hash = -1,
-        .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(debug), },
+        .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(debug), },
     };
     #undef NUM_KEYWORDS
     #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -59,19 +64,20 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
     #  define KWTUPLE NULL
     #endif  // !Py_BUILD_CORE
 
-    static const char * const _keywords[] = {"pid", "all_threads", "debug", NULL};
+    static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "debug", NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
         .fname = "RemoteUnwinder",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
-    PyObject *argsbuf[3];
+    PyObject *argsbuf[4];
     PyObject * const *fastargs;
     Py_ssize_t nargs = PyTuple_GET_SIZE(args);
     Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
     int pid;
     int all_threads = 0;
+    int only_active_thread = 0;
     int debug = 0;
 
     fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
@@ -95,12 +101,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
             goto skip_optional_kwonly;
         }
     }
-    debug = PyObject_IsTrue(fastargs[2]);
+    if (fastargs[2]) {
+        only_active_thread = PyObject_IsTrue(fastargs[2]);
+        if (only_active_thread < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    debug = PyObject_IsTrue(fastargs[3]);
     if (debug < 0) {
         goto exit;
     }
 skip_optional_kwonly:
-    return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, debug);
+    return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, debug);
 
 exit:
     return return_value;
@@ -110,13 +125,18 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__,
 "get_stack_trace($self, /)\n"
 "--\n"
 "\n"
-"Returns a list of stack traces for all threads in the target process.\n"
+"Returns a list of stack traces for threads in the target process.\n"
 "\n"
 "Each element in the returned list is a tuple of (thread_id, frame_list), where:\n"
 "- thread_id is the OS thread identifier\n"
 "- frame_list is a list of tuples (function_name, filename, line_number) representing\n"
 "  the Python stack frames for that thread, ordered from most recent to oldest\n"
 "\n"
+"The threads returned depend on the initialization parameters:\n"
+"- If only_active_thread was True: returns only the thread holding the GIL\n"
+"- If all_threads was True: returns all threads\n"
+"- Otherwise: returns only the main thread\n"
+"\n"
 "Example:\n"
 "    [\n"
 "        (1234, [\n"
@@ -253,4 +273,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject
 
     return return_value;
 }
-/*[clinic end generated code: output=774ec34aa653402d input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a37ab223d5081b16 input=a9049054013a1b77]*/
index 8f9b6cd4c4960f744258e44b2f92a461f408a1b8..d1fcb478d2b035b2c3650f5c39aac85ea7062f89 100644 (file)
@@ -110,14 +110,6 @@ get_page_size(void) {
     return page_size;
 }
 
-typedef struct page_cache_entry {
-    uintptr_t page_addr; // page-aligned base address
-    char *data;
-    int valid;
-    struct page_cache_entry *next;
-} page_cache_entry_t;
-
-#define MAX_PAGES 1024
 
 // Define a platform-independent process handle structure
 typedef struct {
@@ -129,27 +121,9 @@ typedef struct {
 #elif defined(__linux__)
     int memfd;
 #endif
-    page_cache_entry_t pages[MAX_PAGES];
     Py_ssize_t page_size;
 } proc_handle_t;
 
-static void
-_Py_RemoteDebug_FreePageCache(proc_handle_t *handle)
-{
-    for (int i = 0; i < MAX_PAGES; i++) {
-        PyMem_RawFree(handle->pages[i].data);
-        handle->pages[i].data = NULL;
-        handle->pages[i].valid = 0;
-    }
-}
-
-UNUSED static void
-_Py_RemoteDebug_ClearCache(proc_handle_t *handle)
-{
-    for (int i = 0; i < MAX_PAGES; i++) {
-        handle->pages[i].valid = 0;
-    }
-}
 
 #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX
 static mach_port_t pid_to_task(pid_t pid);
@@ -178,10 +152,6 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) {
     handle->memfd = -1;
 #endif
     handle->page_size = get_page_size();
-    for (int i = 0; i < MAX_PAGES; i++) {
-        handle->pages[i].data = NULL;
-        handle->pages[i].valid = 0;
-    }
     return 0;
 }
 
@@ -200,7 +170,6 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) {
     }
 #endif
     handle->pid = 0;
-    _Py_RemoteDebug_FreePageCache(handle);
 }
 
 #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX
@@ -1066,53 +1035,6 @@ _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle,
                                       size_t size,
                                       void *out)
 {
-    size_t page_size = handle->page_size;
-    uintptr_t page_base = addr & ~(page_size - 1);
-    size_t offset_in_page = addr - page_base;
-
-    if (offset_in_page + size > page_size) {
-        return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out);
-    }
-
-    // Search for valid cached page
-    for (int i = 0; i < MAX_PAGES; i++) {
-        page_cache_entry_t *entry = &handle->pages[i];
-        if (entry->valid && entry->page_addr == page_base) {
-            memcpy(out, entry->data + offset_in_page, size);
-            return 0;
-        }
-    }
-
-    // Find reusable slot
-    for (int i = 0; i < MAX_PAGES; i++) {
-        page_cache_entry_t *entry = &handle->pages[i];
-        if (!entry->valid) {
-            if (entry->data == NULL) {
-                entry->data = PyMem_RawMalloc(page_size);
-                if (entry->data == NULL) {
-                    _set_debug_exception_cause(PyExc_MemoryError,
-                        "Cannot allocate %zu bytes for page cache entry "
-                        "during read from PID %d at address 0x%lx",
-                        page_size, handle->pid, addr);
-                    return -1;
-                }
-            }
-
-            if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) {
-                // Try to just copy the exact ammount as a fallback
-                PyErr_Clear();
-                goto fallback;
-            }
-
-            entry->page_addr = page_base;
-            entry->valid = 1;
-            memcpy(out, entry->data + offset_in_page, size);
-            return 0;
-        }
-    }
-
-fallback:
-    // Cache full — fallback to uncached read
     return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out);
 }
 
index 62182194c1ab2a6034f735501b6b9778c56528f4..0ac7ac4d385792585ce948da7fe6aad8449b37db 100644 (file)
@@ -174,6 +174,7 @@ def benchmark(unwinder, duration_seconds=10):
     total_work_time = 0.0
     start_time = time.perf_counter()
     end_time = start_time + duration_seconds
+    total_attempts = 0
 
     colors = get_colors(can_colorize())
 
@@ -183,6 +184,7 @@ def benchmark(unwinder, duration_seconds=10):
 
     try:
         while time.perf_counter() < end_time:
+            total_attempts += 1
             work_start = time.perf_counter()
             try:
                 stack_trace = unwinder.get_stack_trace()
@@ -194,7 +196,6 @@ def benchmark(unwinder, duration_seconds=10):
             work_end = time.perf_counter()
             total_work_time += work_end - work_start
 
-            total_attempts = sample_count + fail_count
             if total_attempts % 10000 == 0:
                 avg_work_time_us = (total_work_time / total_attempts) * 1e6
                 work_rate = (
@@ -221,7 +222,6 @@ def benchmark(unwinder, duration_seconds=10):
 
     actual_end_time = time.perf_counter()
     wall_time = actual_end_time - start_time
-    total_attempts = sample_count + fail_count
 
     # Return final statistics
     return {
@@ -346,6 +346,13 @@ Available code examples:
         help="Code example to benchmark (default: basic)",
     )
 
+    parser.add_argument(
+        "--threads",
+        choices=["all", "main", "only_active"],
+        default="all",
+        help="Which threads to include in the benchmark (default: all)",
+    )
+
     return parser.parse_args()
 
 
@@ -419,8 +426,15 @@ def main():
                 # Create unwinder and run benchmark
                 print(f"{colors.BLUE}Initializing unwinder...{colors.RESET}")
                 try:
+                    kwargs = {}
+                    if args.threads == "all":
+                        kwargs["all_threads"] = True
+                    elif args.threads == "main":
+                        kwargs["all_threads"] = False
+                    elif args.threads == "only_active":
+                        kwargs["only_active_thread"] = True
                     unwinder = _remote_debugging.RemoteUnwinder(
-                        process.pid, all_threads=True
+                        process.pid, **kwargs
                     )
                     results = benchmark(unwinder, duration_seconds=args.duration)
                 finally: