+/******************************************************************************
+ * Python Remote Debugging Module
+ *
+ * This module provides functionality to debug Python processes remotely by
+ * reading their memory and reconstructing stack traces and asyncio task states.
+ ******************************************************************************/
+
#define _GNU_SOURCE
+/* ============================================================================
+ * HEADERS AND INCLUDES
+ * ============================================================================ */
+
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
# define HAVE_PROCESS_VM_READV 0
#endif
+/* ============================================================================
+ * TYPE DEFINITIONS AND STRUCTURES
+ * ============================================================================ */
+
+#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset)))
+
+/* Size macros for opaque buffers */
+#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject)
+#define SIZEOF_CODE_OBJ sizeof(PyCodeObject)
+#define SIZEOF_GEN_OBJ sizeof(PyGenObject)
+#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame)
+#define SIZEOF_LLIST_NODE sizeof(struct llist_node)
+#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t)
+#define SIZEOF_PYOBJECT sizeof(PyObject)
+#define SIZEOF_SET_OBJ sizeof(PySetObject)
+#define SIZEOF_TASK_OBJ 4096
+#define SIZEOF_THREAD_STATE sizeof(PyThreadState)
+#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
+#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
+#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
+
+// Calculate the minimum buffer size needed to read interpreter state fields
+// We need to read code_object_generation and potentially tlbc_generation
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifdef Py_GIL_DISABLED
+#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+ offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
+ offsetof(PyInterpreterState, threads.head) + sizeof(void*))
+#else
+#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+ offsetof(PyInterpreterState, threads.head) + sizeof(void*))
+#endif
+#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
+
+
+
+// Copied from Modules/_asynciomodule.c because it's not exported
+
struct _Py_AsyncioModuleDebugOffsets {
struct _asyncio_task_object {
uint64_t size;
} asyncio_thread_state;
};
+typedef struct {
+ PyObject_HEAD
+ proc_handle_t handle;
+ uintptr_t runtime_start_address;
+ struct _Py_DebugOffsets debug_offsets;
+ int async_debug_offsets_available;
+ struct _Py_AsyncioModuleDebugOffsets async_debug_offsets;
+ uintptr_t interpreter_addr;
+ uintptr_t tstate_addr;
+ uint64_t code_object_generation;
+ _Py_hashtable_t *code_object_cache;
+#ifdef Py_GIL_DISABLED
+ // TLBC cache invalidation tracking
+ uint32_t tlbc_generation; // Track TLBC index pool changes
+ _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address
+#endif
+} RemoteUnwinderObject;
+
+typedef struct {
+ PyObject *func_name;
+ PyObject *file_name;
+ int first_lineno;
+ PyObject *linetable; // bytes
+ uintptr_t addr_code_adaptive;
+} CachedCodeMetadata;
+
+typedef struct {
+ /* Types */
+ PyTypeObject *RemoteDebugging_Type;
+} RemoteDebuggingState;
+
+typedef struct
+{
+ int lineno;
+ int end_lineno;
+ int column;
+ int end_column;
+} LocationInfo;
+
+typedef struct {
+ uintptr_t remote_addr;
+ size_t size;
+ void *local_copy;
+} StackChunkInfo;
+
+typedef struct {
+ StackChunkInfo *chunks;
+ size_t count;
+} StackChunkList;
+
+#include "clinic/_remote_debugging_module.c.h"
+
+/*[clinic input]
+module _remote_debugging
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/
+
+
+/* ============================================================================
+ * FORWARD DECLARATIONS
+ * ============================================================================ */
+
+static int
+parse_tasks_in_set(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t set_addr,
+ PyObject *awaited_by,
+ int recurse_task
+);
+
+static int
+parse_task(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t task_address,
+ PyObject *render_to,
+ int recurse_task
+);
+
+static int
+parse_coro_chain(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t coro_address,
+ PyObject *render_to
+);
+
+/* Forward declarations for task parsing functions */
+static int parse_frame_object(
+ RemoteUnwinderObject *unwinder,
+ PyObject** result,
+ uintptr_t address,
+ uintptr_t* previous_frame
+);
+
+/* ============================================================================
+ * UTILITY FUNCTIONS AND HELPERS
+ * ============================================================================ */
+
+static void
+cached_code_metadata_destroy(void *ptr)
+{
+ CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr;
+ Py_DECREF(meta->func_name);
+ Py_DECREF(meta->file_name);
+ Py_DECREF(meta->linetable);
+ PyMem_RawFree(meta);
+}
+
+static inline RemoteDebuggingState *
+RemoteDebugging_GetState(PyObject *module)
+{
+ void *state = _PyModule_GetState(module);
+ assert(state != NULL);
+ return (RemoteDebuggingState *)state;
+}
+
+static inline int
+RemoteDebugging_InitState(RemoteDebuggingState *st)
+{
+ return 0;
+}
+
// Helper to chain exceptions and avoid repetitions
static void
chain_exceptions(PyObject *exception, const char *string)
_PyErr_ChainExceptions1(exc);
}
-// Get the PyAsyncioDebug section address for any platform
-static uintptr_t
-_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
-{
- uintptr_t address;
-
-#ifdef MS_WINDOWS
- // On Windows, search for asyncio debug in executable or DLL
- address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio");
-#elif defined(__linux__)
- // On Linux, search for asyncio debug in executable or DLL
- address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
-#elif defined(__APPLE__) && TARGET_OS_OSX
- // On macOS, try libpython first, then fall back to python
- address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
- if (address == 0) {
- PyErr_Clear();
- address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
- }
-#else
- Py_UNREACHABLE();
-#endif
-
- return address;
-}
+/* ============================================================================
+ * MEMORY READING FUNCTIONS
+ * ============================================================================ */
static inline int
read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr)
{
- int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr);
+ int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(void*), ptr_addr);
if (result < 0) {
return -1;
}
static inline int
read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size)
{
- int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size);
+ int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size);
if (result < 0) {
return -1;
}
static int
read_char(proc_handle_t *handle, uintptr_t address, char *result)
{
- int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result);
- if (res < 0) {
- return -1;
- }
- return 0;
-}
-
-static int
-read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size)
-{
- int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result);
- if (res < 0) {
- return -1;
- }
- return 0;
-}
-
-static int
-read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result)
-{
- int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result);
+ int res = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(char), result);
if (res < 0) {
return -1;
}
return 0;
}
-static int
-read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr)
-{
- int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr);
- if (res < 0) {
- return -1;
- }
- return 0;
-}
+/* ============================================================================
+ * PYTHON OBJECT READING FUNCTIONS
+ * ============================================================================ */
static PyObject *
read_py_str(
- proc_handle_t *handle,
- _Py_DebugOffsets* debug_offsets,
+ RemoteUnwinderObject *unwinder,
uintptr_t address,
Py_ssize_t max_len
) {
PyObject *result = NULL;
char *buf = NULL;
- Py_ssize_t len;
- int res = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- address + debug_offsets->unicode_object.length,
- sizeof(Py_ssize_t),
- &len
+ // Read the entire PyUnicodeObject at once
+ char unicode_obj[SIZEOF_UNICODE_OBJ];
+ int res = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address,
+ SIZEOF_UNICODE_OBJ,
+ unicode_obj
);
if (res < 0) {
goto err;
}
+ Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length);
+ if (len < 0 || len > max_len) {
+ PyErr_Format(PyExc_RuntimeError,
+ "Invalid string length (%zd) at 0x%lx", len, address);
+ return NULL;
+ }
+
buf = (char *)PyMem_RawMalloc(len+1);
if (buf == NULL) {
PyErr_NoMemory();
return NULL;
}
- size_t offset = debug_offsets->unicode_object.asciiobject_size;
- res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf);
+ size_t offset = unwinder->debug_offsets.unicode_object.asciiobject_size;
+ res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
if (res < 0) {
goto err;
}
static PyObject *
read_py_bytes(
- proc_handle_t *handle,
- _Py_DebugOffsets* debug_offsets,
- uintptr_t address
+ RemoteUnwinderObject *unwinder,
+ uintptr_t address,
+ Py_ssize_t max_len
) {
PyObject *result = NULL;
char *buf = NULL;
- Py_ssize_t len;
- int res = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- address + debug_offsets->bytes_object.ob_size,
- sizeof(Py_ssize_t),
- &len
+ // Read the entire PyBytesObject at once
+ char bytes_obj[SIZEOF_BYTES_OBJ];
+ int res = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address,
+ SIZEOF_BYTES_OBJ,
+ bytes_obj
);
if (res < 0) {
goto err;
}
+ Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size);
+ if (len < 0 || len > max_len) {
+ PyErr_Format(PyExc_RuntimeError,
+ "Invalid string length (%zd) at 0x%lx", len, address);
+ return NULL;
+ }
+
buf = (char *)PyMem_RawMalloc(len+1);
if (buf == NULL) {
PyErr_NoMemory();
return NULL;
}
- size_t offset = debug_offsets->bytes_object.ob_sval;
- res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf);
+ size_t offset = unwinder->debug_offsets.bytes_object.ob_sval;
+ res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
if (res < 0) {
goto err;
}
return NULL;
}
-
-
static long
-read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address)
+read_py_long(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t address
+)
{
unsigned int shift = PYLONG_BITS_IN_DIGIT;
- Py_ssize_t size;
- uintptr_t lv_tag;
-
- int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle, address + offsets->long_object.lv_tag,
- sizeof(uintptr_t),
- &lv_tag);
+ // Read the entire PyLongObject at once
+ char long_obj[SIZEOF_LONG_OBJ];
+ int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address,
+ unwinder->debug_offsets.long_object.size,
+ long_obj);
if (bytes_read < 0) {
return -1;
}
+ uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag);
int negative = (lv_tag & 3) == 2;
- size = lv_tag >> 3;
+ Py_ssize_t size = lv_tag >> 3;
if (size == 0) {
return 0;
}
- digit *digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
- if (!digits) {
- PyErr_NoMemory();
- return -1;
- }
+ // If the long object has inline digits, use them directly
+ digit *digits;
+ if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
+ // For small integers, digits are inline in the long_value.ob_digit array
+ digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
+ if (!digits) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
+ } else {
+ // For larger integers, we need to read the digits separately
+ digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
+ if (!digits) {
+ PyErr_NoMemory();
+ return -1;
+ }
- bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- address + offsets->long_object.ob_digit,
- sizeof(digit) * size,
- digits
- );
- if (bytes_read < 0) {
- goto error;
+ bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address + unwinder->debug_offsets.long_object.ob_digit,
+ sizeof(digit) * size,
+ digits
+ );
+ if (bytes_read < 0) {
+ goto error;
+ }
}
long long value = 0;
return -1;
}
+/* ============================================================================
+ * ASYNCIO DEBUG FUNCTIONS
+ * ============================================================================ */
+
+// Get the PyAsyncioDebug section address for any platform
+static uintptr_t
+_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
+{
+ uintptr_t address;
+
+#ifdef MS_WINDOWS
+ // On Windows, search for asyncio debug in executable or DLL
+ address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio");
+ if (address == 0) {
+ // Error out: 'python' substring covers both executable and DLL
+ PyObject *exc = PyErr_GetRaisedException();
+ PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+ _PyErr_ChainExceptions1(exc);
+ }
+#elif defined(__linux__)
+ // On Linux, search for asyncio debug in executable or DLL
+ address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
+ if (address == 0) {
+ // Error out: 'python' substring covers both executable and DLL
+ PyObject *exc = PyErr_GetRaisedException();
+ PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+ _PyErr_ChainExceptions1(exc);
+ }
+#elif defined(__APPLE__) && TARGET_OS_OSX
+ // On macOS, try libpython first, then fall back to python
+ address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
+ if (address == 0) {
+ PyErr_Clear();
+ address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython");
+ }
+ if (address == 0) {
+ // Error out: 'python' substring covers both executable and DLL
+ PyObject *exc = PyErr_GetRaisedException();
+ PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+ _PyErr_ChainExceptions1(exc);
+ }
+#else
+ Py_UNREACHABLE();
+#endif
+
+ return address;
+}
+
+static int
+read_async_debug(
+ RemoteUnwinderObject *unwinder
+) {
+ uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle);
+ if (!async_debug_addr) {
+ return -1;
+ }
+
+ size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets);
+ int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets);
+ return result;
+}
+
+/* ============================================================================
+ * ASYNCIO TASK PARSING FUNCTIONS
+ * ============================================================================ */
+
static PyObject *
parse_task_name(
- proc_handle_t *handle,
- _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
+ RemoteUnwinderObject *unwinder,
uintptr_t task_address
) {
- uintptr_t task_name_addr;
- int err = read_py_ptr(
- handle,
- task_address + async_offsets->asyncio_task_object.task_name,
- &task_name_addr);
- if (err) {
+ // Read the entire TaskObj at once
+ char task_obj[SIZEOF_TASK_OBJ];
+ int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ task_address,
+ unwinder->async_debug_offsets.asyncio_task_object.size,
+ task_obj);
+ if (err < 0) {
return NULL;
}
- // The task name can be a long or a string so we need to check the type
+ uintptr_t task_name_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name);
+ task_name_addr &= ~Py_TAG_BITS;
- PyObject task_name_obj;
- err = read_pyobj(
- handle,
+ // The task name can be a long or a string so we need to check the type
+ char task_name_obj[SIZEOF_PYOBJECT];
+ err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
task_name_addr,
- &task_name_obj);
- if (err) {
+ SIZEOF_PYOBJECT,
+ task_name_obj);
+ if (err < 0) {
return NULL;
}
- unsigned long flags;
- err = read_unsigned_long(
- handle,
- (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags,
- &flags);
- if (err) {
+ // Now read the type object to get the flags
+ char type_obj[SIZEOF_TYPE_OBJ];
+ err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type),
+ SIZEOF_TYPE_OBJ,
+ type_obj);
+ if (err < 0) {
return NULL;
}
- if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) {
- long res = read_py_long(handle, offsets, task_name_addr);
+ if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
+ long res = read_py_long(unwinder, task_name_addr);
if (res == -1) {
chain_exceptions(PyExc_RuntimeError, "Failed to get task name");
return NULL;
return PyUnicode_FromFormat("Task-%d", res);
}
- if(!(flags & Py_TPFLAGS_UNICODE_SUBCLASS)) {
+ if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) {
PyErr_SetString(PyExc_RuntimeError, "Invalid task name object");
return NULL;
}
return read_py_str(
- handle,
- offsets,
+ unwinder,
task_name_addr,
255
);
}
-static int
-parse_frame_object(
- proc_handle_t *handle,
- PyObject** result,
- struct _Py_DebugOffsets* offsets,
- uintptr_t address,
- uintptr_t* previous_frame
-);
-
-static int
-parse_coro_chain(
- proc_handle_t *handle,
- struct _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
- uintptr_t coro_address,
- PyObject *render_to
+static int parse_task_awaited_by(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t task_address,
+ PyObject *awaited_by,
+ int recurse_task
) {
- assert((void*)coro_address != NULL);
-
- uintptr_t gen_type_addr;
- int err = read_ptr(
- handle,
- coro_address + offsets->pyobject.ob_type,
- &gen_type_addr);
- if (err) {
+ // Read the entire TaskObj at once
+ char task_obj[SIZEOF_TASK_OBJ];
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
+ unwinder->async_debug_offsets.asyncio_task_object.size,
+ task_obj) < 0) {
return -1;
}
- PyObject* name = NULL;
- uintptr_t prev_frame;
- if (parse_frame_object(
- handle,
- &name,
- offsets,
- coro_address + offsets->gen_object.gi_iframe,
- &prev_frame)
- < 0)
- {
- return -1;
- }
+ uintptr_t task_ab_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by);
+ task_ab_addr &= ~Py_TAG_BITS;
- if (PyList_Append(render_to, name)) {
- Py_DECREF(name);
- return -1;
+ if ((void*)task_ab_addr == NULL) {
+ return 0;
}
- Py_DECREF(name);
- int8_t gi_frame_state;
- err = read_sized_int(
- handle,
- coro_address + offsets->gen_object.gi_frame_state,
- &gi_frame_state,
- sizeof(int8_t)
- );
- if (err) {
- return -1;
- }
+ char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set);
- if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) {
- char owner;
- err = read_char(
- handle,
- coro_address + offsets->gen_object.gi_iframe +
- offsets->interpreter_frame.owner,
- &owner
- );
- if (err) {
+ if (awaited_by_is_a_set) {
+ if (parse_tasks_in_set(unwinder, task_ab_addr, awaited_by, recurse_task)) {
return -1;
}
- if (owner != FRAME_OWNED_BY_GENERATOR) {
- PyErr_SetString(
- PyExc_RuntimeError,
- "generator doesn't own its frame \\_o_/");
+ } else {
+ if (parse_task(unwinder, task_ab_addr, awaited_by, recurse_task)) {
return -1;
}
+ }
+
+ return 0;
+}
+
+static int
+handle_yield_from_frame(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t gi_iframe_addr,
+ uintptr_t gen_type_addr,
+ PyObject *render_to
+) {
+ // Read the entire interpreter frame at once
+ char iframe[SIZEOF_INTERP_FRAME];
+ int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ gi_iframe_addr,
+ SIZEOF_INTERP_FRAME,
+ iframe);
+ if (err < 0) {
+ return -1;
+ }
+
+ if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) {
+ PyErr_SetString(
+ PyExc_RuntimeError,
+ "generator doesn't own its frame \\_o_/");
+ return -1;
+ }
+
+ uintptr_t stackpointer_addr = GET_MEMBER(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer);
+ stackpointer_addr &= ~Py_TAG_BITS;
- uintptr_t stackpointer_addr;
+ if ((void*)stackpointer_addr != NULL) {
+ uintptr_t gi_await_addr;
err = read_py_ptr(
- handle,
- coro_address + offsets->gen_object.gi_iframe +
- offsets->interpreter_frame.stackpointer,
- &stackpointer_addr);
+ &unwinder->handle,
+ stackpointer_addr - sizeof(void*),
+ &gi_await_addr);
if (err) {
return -1;
}
- if ((void*)stackpointer_addr != NULL) {
- uintptr_t gi_await_addr;
- err = read_py_ptr(
- handle,
- stackpointer_addr - sizeof(void*),
- &gi_await_addr);
+ if ((void*)gi_await_addr != NULL) {
+ uintptr_t gi_await_addr_type_addr;
+ err = read_ptr(
+ &unwinder->handle,
+ gi_await_addr + unwinder->debug_offsets.pyobject.ob_type,
+ &gi_await_addr_type_addr);
if (err) {
return -1;
}
- if ((void*)gi_await_addr != NULL) {
- uintptr_t gi_await_addr_type_addr;
- int err = read_ptr(
- handle,
- gi_await_addr + offsets->pyobject.ob_type,
- &gi_await_addr_type_addr);
+ if (gen_type_addr == gi_await_addr_type_addr) {
+ /* This needs an explanation. We always start with parsing
+ native coroutine / generator frames. Ultimately they
+ are awaiting on something. That something can be
+ a native coroutine frame or... an iterator.
+ If it's the latter -- we can't continue building
+ our chain. So the condition to bail out of this is
+ to do that when the type of the current coroutine
+ doesn't match the type of whatever it points to
+ in its cr_await.
+ */
+ err = parse_coro_chain(unwinder, gi_await_addr, render_to);
if (err) {
return -1;
}
-
- if (gen_type_addr == gi_await_addr_type_addr) {
- /* This needs an explanation. We always start with parsing
- native coroutine / generator frames. Ultimately they
- are awaiting on something. That something can be
- a native coroutine frame or... an iterator.
- If it's the latter -- we can't continue building
- our chain. So the condition to bail out of this is
- to do that when the type of the current coroutine
- doesn't match the type of whatever it points to
- in its cr_await.
- */
- err = parse_coro_chain(
- handle,
- offsets,
- async_offsets,
- gi_await_addr,
- render_to
- );
- if (err) {
- return -1;
- }
- }
}
}
-
}
return 0;
}
-
static int
-parse_task_awaited_by(
- proc_handle_t *handle,
- struct _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
- uintptr_t task_address,
- PyObject *awaited_by,
- int recurse_task
-);
+parse_coro_chain(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t coro_address,
+ PyObject *render_to
+) {
+ assert((void*)coro_address != NULL);
+
+ // Read the entire generator object at once
+ char gen_object[SIZEOF_GEN_OBJ];
+ int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ coro_address,
+ SIZEOF_GEN_OBJ,
+ gen_object);
+ if (err < 0) {
+ return -1;
+ }
+ uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type);
-static int
-parse_task(
- proc_handle_t *handle,
- struct _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
+ PyObject* name = NULL;
+
+ // Parse the previous frame using the gi_iframe from local copy
+ uintptr_t prev_frame;
+ uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe;
+ if (parse_frame_object(unwinder, &name, gi_iframe_addr, &prev_frame) < 0) {
+ return -1;
+ }
+
+ if (PyList_Append(render_to, name)) {
+ Py_DECREF(name);
+ return -1;
+ }
+ Py_DECREF(name);
+
+ if (GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state) == FRAME_SUSPENDED_YIELD_FROM) {
+ return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to);
+ }
+
+ return 0;
+}
+
+static PyObject*
+create_task_result(
+ RemoteUnwinderObject *unwinder,
uintptr_t task_address,
- PyObject *render_to,
int recurse_task
) {
- char is_task;
- int err = read_char(
- handle,
- task_address + async_offsets->asyncio_task_object.task_is_task,
- &is_task);
- if (err) {
- return -1;
- }
+ PyObject* result = NULL;
+ PyObject *call_stack = NULL;
+ PyObject *tn = NULL;
+ char task_obj[SIZEOF_TASK_OBJ];
+ uintptr_t coro_addr;
- PyObject* result = PyList_New(0);
+ result = PyList_New(0);
if (result == NULL) {
- return -1;
+ goto error;
}
- PyObject *call_stack = PyList_New(0);
+ call_stack = PyList_New(0);
if (call_stack == NULL) {
- goto err;
+ goto error;
}
+
if (PyList_Append(result, call_stack)) {
- Py_DECREF(call_stack);
- goto err;
+ goto error;
}
- /* we can operate on a borrowed one to simplify cleanup */
- Py_DECREF(call_stack);
+ Py_CLEAR(call_stack);
- if (is_task) {
- PyObject *tn = NULL;
- if (recurse_task) {
- tn = parse_task_name(
- handle, offsets, async_offsets, task_address);
- } else {
- tn = PyLong_FromUnsignedLongLong(task_address);
+ if (recurse_task) {
+ tn = parse_task_name(unwinder, task_address);
+ } else {
+ tn = PyLong_FromUnsignedLongLong(task_address);
+ }
+ if (tn == NULL) {
+ goto error;
+ }
+
+ if (PyList_Append(result, tn)) {
+ goto error;
+ }
+ Py_CLEAR(tn);
+
+ // Parse coroutine chain
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
+ unwinder->async_debug_offsets.asyncio_task_object.size,
+ task_obj) < 0) {
+ goto error;
+ }
+
+ coro_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro);
+ coro_addr &= ~Py_TAG_BITS;
+
+ if ((void*)coro_addr != NULL) {
+ call_stack = PyList_New(0);
+ if (call_stack == NULL) {
+ goto error;
}
- if (tn == NULL) {
- goto err;
+
+ if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) {
+ Py_DECREF(call_stack);
+ goto error;
}
- if (PyList_Append(result, tn)) {
- Py_DECREF(tn);
- goto err;
+
+ if (PyList_Reverse(call_stack)) {
+ Py_DECREF(call_stack);
+ goto error;
}
- Py_DECREF(tn);
- uintptr_t coro_addr;
- err = read_py_ptr(
- handle,
- task_address + async_offsets->asyncio_task_object.task_coro,
- &coro_addr);
- if (err) {
- goto err;
+ if (PyList_SetItem(result, 0, call_stack) < 0) {
+ Py_DECREF(call_stack);
+ goto error;
}
+ }
- if ((void*)coro_addr != NULL) {
- err = parse_coro_chain(
- handle,
- offsets,
- async_offsets,
- coro_addr,
- call_stack
- );
- if (err) {
- goto err;
- }
+ return result;
- if (PyList_Reverse(call_stack)) {
- goto err;
- }
+error:
+ Py_XDECREF(result);
+ Py_XDECREF(call_stack);
+ Py_XDECREF(tn);
+ return NULL;
+}
+
+static int
+parse_task(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t task_address,
+ PyObject *render_to,
+ int recurse_task
+) {
+ char is_task;
+ PyObject* result = NULL;
+ PyObject* awaited_by = NULL;
+ int err;
+
+ err = read_char(
+ &unwinder->handle,
+ task_address + unwinder->async_debug_offsets.asyncio_task_object.task_is_task,
+ &is_task);
+ if (err) {
+ goto error;
+ }
+
+ if (is_task) {
+ result = create_task_result(unwinder, task_address, recurse_task);
+ if (!result) {
+ goto error;
+ }
+ } else {
+ result = PyList_New(0);
+ if (result == NULL) {
+ goto error;
}
}
if (PyList_Append(render_to, result)) {
- goto err;
+ goto error;
}
if (recurse_task) {
- PyObject *awaited_by = PyList_New(0);
+ awaited_by = PyList_New(0);
if (awaited_by == NULL) {
- goto err;
+ goto error;
}
+
if (PyList_Append(result, awaited_by)) {
- Py_DECREF(awaited_by);
- goto err;
+ goto error;
}
- /* we can operate on a borrowed one to simplify cleanup */
Py_DECREF(awaited_by);
- if (parse_task_awaited_by(handle, offsets, async_offsets,
- task_address, awaited_by, 1)
- ) {
- goto err;
+ /* awaited_by is borrowed from 'result' to simplify cleanup */
+ if (parse_task_awaited_by(unwinder, task_address, awaited_by, 1) < 0) {
+ // Clear the pointer so the cleanup doesn't try to decref it since
+ // it's borrowed from 'result' and will be decrefed when result is
+ // deleted.
+ awaited_by = NULL;
+ goto error;
}
}
Py_DECREF(result);
return 0;
-err:
- Py_DECREF(result);
+error:
+ Py_XDECREF(result);
+ Py_XDECREF(awaited_by);
return -1;
}
static int
-parse_tasks_in_set(
- proc_handle_t *handle,
- struct _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
- uintptr_t set_addr,
+process_set_entry(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t table_ptr,
PyObject *awaited_by,
int recurse_task
) {
- uintptr_t set_obj;
- if (read_py_ptr(
- handle,
- set_addr,
- &set_obj)
- ) {
+ uintptr_t key_addr;
+ if (read_py_ptr(&unwinder->handle, table_ptr, &key_addr)) {
return -1;
}
- Py_ssize_t num_els;
- if (read_Py_ssize_t(
- handle,
- set_obj + offsets->set_object.used,
- &num_els)
- ) {
- return -1;
- }
+ if ((void*)key_addr != NULL) {
+ Py_ssize_t ref_cnt;
+ if (read_Py_ssize_t(&unwinder->handle, table_ptr, &ref_cnt)) {
+ return -1;
+ }
- Py_ssize_t set_len;
- if (read_Py_ssize_t(
- handle,
- set_obj + offsets->set_object.mask,
- &set_len)
- ) {
- return -1;
+ if (ref_cnt) {
+ // if 'ref_cnt=0' it's a set dummy marker
+ if (parse_task(unwinder, key_addr, awaited_by, recurse_task)) {
+ return -1;
+ }
+ return 1; // Successfully processed a valid entry
+ }
}
- set_len++; // The set contains the `mask+1` element slots.
+ return 0; // Entry was NULL or dummy marker
+}
- uintptr_t table_ptr;
- if (read_ptr(
- handle,
- set_obj + offsets->set_object.table,
- &table_ptr)
- ) {
+static int
+parse_tasks_in_set(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t set_addr,
+ PyObject *awaited_by,
+ int recurse_task
+) {
+ char set_object[SIZEOF_SET_OBJ];
+ int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ set_addr,
+ SIZEOF_SET_OBJ,
+ set_object);
+ if (err < 0) {
return -1;
}
+ Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
+ Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; // The set contains the `mask+1` element slots.
+ uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);
+
Py_ssize_t i = 0;
Py_ssize_t els = 0;
- while (i < set_len) {
- uintptr_t key_addr;
- if (read_py_ptr(handle, table_ptr, &key_addr)) {
+ while (i < set_len && els < num_els) {
+ int result = process_set_entry(unwinder, table_ptr, awaited_by, recurse_task);
+
+ if (result < 0) {
return -1;
}
-
- if ((void*)key_addr != NULL) {
- Py_ssize_t ref_cnt;
- if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) {
- return -1;
- }
-
- if (ref_cnt) {
- // if 'ref_cnt=0' it's a set dummy marker
-
- if (parse_task(
- handle,
- offsets,
- async_offsets,
- key_addr,
- awaited_by,
- recurse_task
- )
- ) {
- return -1;
- }
-
- if (++els == num_els) {
- break;
- }
- }
+ if (result > 0) {
+ els++;
}
table_ptr += sizeof(void*) * 2;
static int
-parse_task_awaited_by(
- proc_handle_t *handle,
- struct _Py_DebugOffsets* offsets,
- struct _Py_AsyncioModuleDebugOffsets* async_offsets,
- uintptr_t task_address,
- PyObject *awaited_by,
- int recurse_task
+setup_async_result_structure(PyObject **result, PyObject **calls)
+{
+ *result = PyList_New(1);
+ if (*result == NULL) {
+ return -1;
+ }
+
+ *calls = PyList_New(0);
+ if (*calls == NULL) {
+ Py_DECREF(*result);
+ *result = NULL;
+ return -1;
+ }
+
+ if (PyList_SetItem(*result, 0, *calls)) { /* steals ref to 'calls' */
+ Py_DECREF(*calls);
+ Py_DECREF(*result);
+ *result = NULL;
+ *calls = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+add_task_info_to_result(
+ RemoteUnwinderObject *self,
+ PyObject *result,
+ uintptr_t running_task_addr
) {
- uintptr_t task_ab_addr;
- int err = read_py_ptr(
- handle,
- task_address + async_offsets->asyncio_task_object.task_awaited_by,
- &task_ab_addr);
- if (err) {
+ PyObject *tn = parse_task_name(self, running_task_addr);
+ if (tn == NULL) {
return -1;
}
- if ((void*)task_ab_addr == NULL) {
- return 0;
+ if (PyList_Append(result, tn)) {
+ Py_DECREF(tn);
+ return -1;
}
+ Py_DECREF(tn);
- char awaited_by_is_a_set;
- err = read_char(
- handle,
- task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set,
- &awaited_by_is_a_set);
- if (err) {
+ PyObject* awaited_by = PyList_New(0);
+ if (awaited_by == NULL) {
return -1;
}
- if (awaited_by_is_a_set) {
- if (parse_tasks_in_set(
- handle,
- offsets,
- async_offsets,
- task_address + async_offsets->asyncio_task_object.task_awaited_by,
- awaited_by,
- recurse_task
- )
- ) {
- return -1;
- }
- } else {
- uintptr_t sub_task;
- if (read_py_ptr(
- handle,
- task_address + async_offsets->asyncio_task_object.task_awaited_by,
- &sub_task)
- ) {
- return -1;
- }
+ if (PyList_Append(result, awaited_by)) {
+ Py_DECREF(awaited_by);
+ return -1;
+ }
+ Py_DECREF(awaited_by);
- if (parse_task(
- handle,
- offsets,
- async_offsets,
- sub_task,
- awaited_by,
- recurse_task
- )
- ) {
- return -1;
- }
+ if (parse_task_awaited_by(
+ self, running_task_addr, awaited_by, 1) < 0) {
+ return -1;
}
return 0;
}
-typedef struct
+static int
+process_single_task_node(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t task_addr,
+ PyObject *result
+) {
+ PyObject *tn = NULL;
+ PyObject *current_awaited_by = NULL;
+ PyObject *task_id = NULL;
+ PyObject *result_item = NULL;
+
+ tn = parse_task_name(unwinder, task_addr);
+ if (tn == NULL) {
+ goto error;
+ }
+
+ current_awaited_by = PyList_New(0);
+ if (current_awaited_by == NULL) {
+ goto error;
+ }
+
+ task_id = PyLong_FromUnsignedLongLong(task_addr);
+ if (task_id == NULL) {
+ goto error;
+ }
+
+ result_item = PyTuple_New(3);
+ if (result_item == NULL) {
+ goto error;
+ }
+
+ PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref
+ PyTuple_SET_ITEM(result_item, 1, tn); // steals ref
+ PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref
+
+ // References transferred to tuple
+ task_id = NULL;
+ tn = NULL;
+ current_awaited_by = NULL;
+
+ if (PyList_Append(result, result_item)) {
+ Py_DECREF(result_item);
+ return -1;
+ }
+ Py_DECREF(result_item);
+
+ // Get back current_awaited_by reference for parse_task_awaited_by
+ current_awaited_by = PyTuple_GET_ITEM(result_item, 2);
+ if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) {
+ return -1;
+ }
+
+ return 0;
+
+error:
+ Py_XDECREF(tn);
+ Py_XDECREF(current_awaited_by);
+ Py_XDECREF(task_id);
+ Py_XDECREF(result_item);
+ return -1;
+}
+
+/* ============================================================================
+ * TLBC CACHING FUNCTIONS
+ * ============================================================================ */
+
+#ifdef Py_GIL_DISABLED
+
+typedef struct {
+ void *tlbc_array; // Local copy of the TLBC array
+ Py_ssize_t tlbc_array_size; // Size of the TLBC array
+ uint32_t generation; // Generation when this was cached
+} TLBCCacheEntry;
+
+static void
+tlbc_cache_entry_destroy(void *ptr)
{
- int lineno;
- int end_lineno;
- int column;
- int end_column;
-} LocationInfo;
+ TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr;
+ if (entry->tlbc_array) {
+ PyMem_RawFree(entry->tlbc_array);
+ }
+ PyMem_RawFree(entry);
+}
+
+static TLBCCacheEntry *
+get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation)
+{
+ void *key = (void *)code_addr;
+ TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key);
+
+ if (entry && entry->generation != current_generation) {
+ // Entry is stale, remove it by setting to NULL
+ _Py_hashtable_set(self->tlbc_cache, key, NULL);
+ entry = NULL;
+ }
+
+ return entry;
+}
+
+static int
+cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation)
+{
+ uintptr_t tlbc_array_ptr;
+ void *tlbc_array = NULL;
+ TLBCCacheEntry *entry = NULL;
+
+ // Read the TLBC array pointer
+ if (read_ptr(&self->handle, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) {
+ return 0; // No TLBC array
+ }
+
+ // Read the TLBC array size
+ Py_ssize_t tlbc_size;
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) {
+ return 0; // Invalid size
+ }
+
+ // Allocate and read the entire TLBC array
+ size_t array_data_size = tlbc_size * sizeof(void*);
+ tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size);
+ if (!tlbc_array) {
+ return -1; // Memory error
+ }
+
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) {
+ PyMem_RawFree(tlbc_array);
+ return 0; // Read error
+ }
+
+ // Create cache entry
+ entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry));
+ if (!entry) {
+ PyMem_RawFree(tlbc_array);
+ return -1; // Memory error
+ }
+
+ entry->tlbc_array = tlbc_array;
+ entry->tlbc_array_size = tlbc_size;
+ entry->generation = generation;
+
+ // Store in cache
+ void *key = (void *)code_addr;
+ if (_Py_hashtable_set(self->tlbc_cache, key, entry) < 0) {
+ tlbc_cache_entry_destroy(entry);
+ return -1; // Cache error
+ }
+
+ return 1; // Success
+}
+
+
+
+#endif
+
+/* ============================================================================
+ * LINE TABLE PARSING FUNCTIONS
+ * ============================================================================ */
static int
scan_varint(const uint8_t **ptr)
}
}
-
static bool
parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
{
}
default: {
uint8_t second_byte = *(ptr++);
- assert((second_byte & 128) == 0);
+ if ((second_byte & 128) != 0) {
+ return false;
+ }
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
break;
return false;
}
+/* ============================================================================
+ * CODE OBJECT AND FRAME PARSING FUNCTIONS
+ * ============================================================================ */
+
static int
-read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message)
+parse_code_object(RemoteUnwinderObject *unwinder,
+ PyObject **result,
+ uintptr_t address,
+ uintptr_t instruction_pointer,
+ uintptr_t *previous_frame,
+ int32_t tlbc_index)
{
- int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr);
- if (bytes_read < 0) {
- return -1;
+ void *key = (void *)address;
+ CachedCodeMetadata *meta = NULL;
+ PyObject *func = NULL;
+ PyObject *file = NULL;
+ PyObject *linetable = NULL;
+ PyObject *lineno = NULL;
+ PyObject *tuple = NULL;
+
+#ifdef Py_GIL_DISABLED
+ // In free threading builds, code object addresses might have the low bit set
+ // as a flag, so we need to mask it off to get the real address
+ uintptr_t real_address = address & (~1);
+#else
+ uintptr_t real_address = address;
+#endif
+
+ if (unwinder && unwinder->code_object_cache != NULL) {
+ meta = _Py_hashtable_get(unwinder->code_object_cache, key);
}
- if ((void *)(*out_ptr) == NULL) {
- PyErr_SetString(PyExc_RuntimeError, error_message);
- return -1;
+ if (meta == NULL) {
+ char code_object[SIZEOF_CODE_OBJ];
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0)
+ {
+ goto error;
+ }
+
+ func = read_py_str(unwinder,
+ GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024);
+ if (!func) {
+ goto error;
+ }
+
+ file = read_py_str(unwinder,
+ GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024);
+ if (!file) {
+ goto error;
+ }
+
+ linetable = read_py_bytes(unwinder,
+ GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096);
+ if (!linetable) {
+ goto error;
+ }
+
+ meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata));
+ if (!meta) {
+ goto error;
+ }
+
+ meta->func_name = func;
+ meta->file_name = file;
+ meta->linetable = linetable;
+ meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno);
+ meta->addr_code_adaptive = real_address + unwinder->debug_offsets.code_object.co_code_adaptive;
+
+ if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) {
+ cached_code_metadata_destroy(meta);
+ goto error;
+ }
+
+ // Ownership transferred to meta
+ func = NULL;
+ file = NULL;
+ linetable = NULL;
+ }
+
+ uintptr_t ip = instruction_pointer;
+ ptrdiff_t addrq;
+
+#ifdef Py_GIL_DISABLED
+ // Handle thread-local bytecode (TLBC) in free threading builds
+ if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
+ // No TLBC or no unwinder - use main bytecode directly
+ addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+ goto done_tlbc;
+ }
+
+ // Try to get TLBC data from cache (we'll get generation from the caller)
+ TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
+
+ if (!tlbc_entry) {
+ // Cache miss - try to read and cache TLBC array
+ if (cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation) > 0) {
+ tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
+ }
+ }
+
+ if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
+ // Use cached TLBC data
+ uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
+ uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
+
+ if (tlbc_bytecode_addr != 0) {
+ // Calculate offset from TLBC bytecode
+ addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr;
+ goto done_tlbc;
+ }
+ }
+
+ // Fall back to main bytecode
+ addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+
+done_tlbc:
+#else
+ // Non-free-threaded build, always use the main bytecode
+ (void)tlbc_index; // Suppress unused parameter warning
+ (void)unwinder; // Suppress unused parameter warning
+ addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+#endif
+ ; // Empty statement to avoid C23 extension warning
+ LocationInfo info = {0};
+ bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
+ meta->first_lineno, &info);
+ if (!ok) {
+ info.lineno = -1;
+ }
+
+ lineno = PyLong_FromLong(info.lineno);
+ if (!lineno) {
+ goto error;
+ }
+
+ tuple = PyTuple_New(3);
+ if (!tuple) {
+ goto error;
}
+ Py_INCREF(meta->func_name);
+ Py_INCREF(meta->file_name);
+ PyTuple_SET_ITEM(tuple, 0, meta->func_name);
+ PyTuple_SET_ITEM(tuple, 1, meta->file_name);
+ PyTuple_SET_ITEM(tuple, 2, lineno);
+
+ *result = tuple;
return 0;
+
+error:
+ Py_XDECREF(func);
+ Py_XDECREF(file);
+ Py_XDECREF(linetable);
+ Py_XDECREF(lineno);
+ Py_XDECREF(tuple);
+ return -1;
}
-static int
-read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets,
- uintptr_t current_frame, uintptr_t *instruction_ptr)
+/* ============================================================================
+ * STACK CHUNK MANAGEMENT FUNCTIONS
+ * ============================================================================ */
+
+static void
+cleanup_stack_chunks(StackChunkList *chunks)
{
- return read_remote_pointer(
- handle,
- current_frame + offsets->interpreter_frame.instr_ptr,
- instruction_ptr,
- "No instruction ptr found"
- );
+ for (size_t i = 0; i < chunks->count; ++i) {
+ PyMem_RawFree(chunks->chunks[i].local_copy);
+ }
+ PyMem_RawFree(chunks->chunks);
}
static int
-parse_code_object(proc_handle_t *handle,
- PyObject **result,
- struct _Py_DebugOffsets *offsets,
- uintptr_t address,
- uintptr_t current_frame,
- uintptr_t *previous_frame)
-{
- uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr;
+process_single_stack_chunk(
+ proc_handle_t *handle,
+ uintptr_t chunk_addr,
+ StackChunkInfo *chunk_info
+) {
+ // Start with default size assumption
+ size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
- if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 ||
- read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 ||
- read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 ||
- read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) {
+ char *this_chunk = PyMem_RawMalloc(current_size);
+ if (!this_chunk) {
+ PyErr_NoMemory();
return -1;
}
- int firstlineno;
- if (_Py_RemoteDebug_ReadRemoteMemory(handle,
- address + offsets->code_object.firstlineno,
- sizeof(int),
- &firstlineno) < 0) {
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, current_size, this_chunk) < 0) {
+ PyMem_RawFree(this_chunk);
return -1;
}
- PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable);
- if (!py_linetable) {
- return -1;
+ // Check actual size and reread if necessary
+ size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
+ if (actual_size != current_size) {
+ this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
+ if (!this_chunk) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, actual_size, this_chunk) < 0) {
+ PyMem_RawFree(this_chunk);
+ return -1;
+ }
+ current_size = actual_size;
}
- uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive;
- ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive;
+ chunk_info->remote_addr = chunk_addr;
+ chunk_info->size = current_size;
+ chunk_info->local_copy = this_chunk;
+ return 0;
+}
- LocationInfo info;
- parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info);
- Py_DECREF(py_linetable); // Done with linetable
+static int
+copy_stack_chunks(RemoteUnwinderObject *unwinder,
+ uintptr_t tstate_addr,
+ StackChunkList *out_chunks)
+{
+ uintptr_t chunk_addr;
+ StackChunkInfo *chunks = NULL;
+ size_t count = 0;
+ size_t max_chunks = 16;
- PyObject *py_line = PyLong_FromLong(info.lineno);
- if (!py_line) {
+ if (read_ptr(&unwinder->handle, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
return -1;
}
- PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256);
- if (!py_func_name) {
- Py_DECREF(py_line);
+ chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
+ if (!chunks) {
+ PyErr_NoMemory();
return -1;
}
- PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256);
- if (!py_file_name) {
- Py_DECREF(py_line);
- Py_DECREF(py_func_name);
- return -1;
+ while (chunk_addr != 0) {
+ // Grow array if needed
+ if (count >= max_chunks) {
+ max_chunks *= 2;
+ StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
+ if (!new_chunks) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ chunks = new_chunks;
+ }
+
+ // Process this chunk
+ if (process_single_stack_chunk(&unwinder->handle, chunk_addr, &chunks[count]) < 0) {
+ goto error;
+ }
+
+ // Get next chunk address and increment count
+ chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
+ count++;
}
- PyObject *result_tuple = PyTuple_New(3);
- if (!result_tuple) {
- Py_DECREF(py_line);
- Py_DECREF(py_func_name);
- Py_DECREF(py_file_name);
- return -1;
+ out_chunks->chunks = chunks;
+ out_chunks->count = count;
+ return 0;
+
+error:
+ for (size_t i = 0; i < count; ++i) {
+ PyMem_RawFree(chunks[i].local_copy);
}
+ PyMem_RawFree(chunks);
+ return -1;
+}
- PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref
- PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref
- PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref
+static void *
+find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
+{
+ for (size_t i = 0; i < chunks->count; ++i) {
+ uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
+ size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);
- *result = result_tuple;
- return 0;
+ if (remote_ptr >= base && remote_ptr < base + payload) {
+ return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr);
+ }
+ }
+ return NULL;
}
static int
-parse_frame_object(
- proc_handle_t *handle,
- PyObject** result,
- struct _Py_DebugOffsets* offsets,
+parse_frame_from_chunks(
+ RemoteUnwinderObject *unwinder,
+ PyObject **result,
uintptr_t address,
- uintptr_t* previous_frame
+ uintptr_t *previous_frame,
+ StackChunkList *chunks
) {
- int err;
-
- Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- address + offsets->interpreter_frame.previous,
- sizeof(void*),
- previous_frame
- );
- if (bytes_read < 0) {
+ void *frame_ptr = find_frame_in_chunks(chunks, address);
+ if (!frame_ptr) {
return -1;
}
- char owner;
- if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) {
- return -1;
- }
+ char *frame = (char *)frame_ptr;
+ *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
- if (owner >= FRAME_OWNED_BY_INTERPRETER) {
+ if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER ||
+ !GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable)) {
return 0;
}
- uintptr_t address_of_code_object;
- err = read_py_ptr(
- handle,
- address + offsets->interpreter_frame.executable,
- &address_of_code_object
- );
- if (err) {
- return -1;
- }
+ uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
- if ((void*)address_of_code_object == NULL) {
- return 0;
+ // Get tlbc_index for free threading builds
+ int32_t tlbc_index = 0;
+#ifdef Py_GIL_DISABLED
+ if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
+ tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
}
+#endif
return parse_code_object(
- handle, result, offsets, address_of_code_object, address, previous_frame);
+ unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable),
+ instruction_pointer, previous_frame, tlbc_index);
}
+/* ============================================================================
+ * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS
+ * ============================================================================ */
+
static int
-parse_async_frame_object(
- proc_handle_t *handle,
- PyObject** result,
- struct _Py_DebugOffsets* offsets,
- uintptr_t address,
- uintptr_t* previous_frame,
- uintptr_t* code_object
+populate_initial_state_data(
+ int all_threads,
+ RemoteUnwinderObject *unwinder,
+ uintptr_t runtime_start_address,
+ uintptr_t *interpreter_state,
+ uintptr_t *tstate
) {
- int err;
-
- Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- address + offsets->interpreter_frame.previous,
- sizeof(void*),
- previous_frame
- );
- if (bytes_read < 0) {
- return -1;
- }
+ uint64_t interpreter_state_list_head =
+ unwinder->debug_offsets.runtime_state.interpreters_head;
- char owner;
- bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner);
+ uintptr_t address_of_interpreter_state;
+ int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ runtime_start_address + interpreter_state_list_head,
+ sizeof(void*),
+ &address_of_interpreter_state);
if (bytes_read < 0) {
return -1;
}
- if (owner == FRAME_OWNED_BY_CSTACK || owner == FRAME_OWNED_BY_INTERPRETER) {
- return 0; // C frame
- }
-
- if (owner != FRAME_OWNED_BY_GENERATOR
- && owner != FRAME_OWNED_BY_THREAD) {
- PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
+ if (address_of_interpreter_state == 0) {
+ PyErr_SetString(PyExc_RuntimeError, "No interpreter state found");
return -1;
}
- err = read_py_ptr(
- handle,
- address + offsets->interpreter_frame.executable,
- code_object
- );
- if (err) {
- return -1;
- }
+ *interpreter_state = address_of_interpreter_state;
- assert(code_object != NULL);
- if ((void*)*code_object == NULL) {
+ if (all_threads) {
+ *tstate = 0;
return 0;
}
- if (parse_code_object(
- handle, result, offsets, *code_object, address, previous_frame)) {
- return -1;
- }
-
- return 1;
-}
+ uintptr_t address_of_thread = address_of_interpreter_state +
+ unwinder->debug_offsets.interpreter_state.threads_main;
-static int
-read_async_debug(
- proc_handle_t *handle,
- struct _Py_AsyncioModuleDebugOffsets* async_debug
-) {
- uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle);
- if (!async_debug_addr) {
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address_of_thread,
+ sizeof(void*),
+ tstate) < 0) {
return -1;
}
- size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets);
- int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug);
- return result;
+ return 0;
}
static int
find_running_frame(
- proc_handle_t *handle,
+ RemoteUnwinderObject *unwinder,
uintptr_t runtime_start_address,
- _Py_DebugOffsets* local_debug_offsets,
uintptr_t *frame
) {
uint64_t interpreter_state_list_head =
- local_debug_offsets->runtime_state.interpreters_head;
+ unwinder->debug_offsets.runtime_state.interpreters_head;
uintptr_t address_of_interpreter_state;
- int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
+ int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
runtime_start_address + interpreter_state_list_head,
sizeof(void*),
&address_of_interpreter_state);
}
uintptr_t address_of_thread;
- bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
+ bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
address_of_interpreter_state +
- local_debug_offsets->interpreter_state.threads_main,
+ unwinder->debug_offsets.interpreter_state.threads_main,
sizeof(void*),
&address_of_thread);
if (bytes_read < 0) {
// No Python frames are available for us (can happen at tear-down).
if ((void*)address_of_thread != NULL) {
int err = read_ptr(
- handle,
- address_of_thread + local_debug_offsets->thread_state.current_frame,
+ &unwinder->handle,
+ address_of_thread + unwinder->debug_offsets.thread_state.current_frame,
frame);
if (err) {
return -1;
static int
find_running_task(
- proc_handle_t *handle,
- uintptr_t runtime_start_address,
- _Py_DebugOffsets *local_debug_offsets,
- struct _Py_AsyncioModuleDebugOffsets *async_offsets,
+ RemoteUnwinderObject *unwinder,
uintptr_t *running_task_addr
) {
*running_task_addr = (uintptr_t)NULL;
uint64_t interpreter_state_list_head =
- local_debug_offsets->runtime_state.interpreters_head;
+ unwinder->debug_offsets.runtime_state.interpreters_head;
uintptr_t address_of_interpreter_state;
- int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- runtime_start_address + interpreter_state_list_head,
+ int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ unwinder->runtime_start_address + interpreter_state_list_head,
sizeof(void*),
&address_of_interpreter_state);
if (bytes_read < 0) {
}
uintptr_t address_of_thread;
- bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
- handle,
+ bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
address_of_interpreter_state +
- local_debug_offsets->interpreter_state.threads_head,
+ unwinder->debug_offsets.interpreter_state.threads_head,
sizeof(void*),
&address_of_thread);
if (bytes_read < 0) {
}
bytes_read = read_py_ptr(
- handle,
+ &unwinder->handle,
address_of_thread
- + async_offsets->asyncio_thread_state.asyncio_running_loop,
+ + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop,
&address_of_running_loop);
if (bytes_read == -1) {
return -1;
}
int err = read_ptr(
- handle,
+ &unwinder->handle,
address_of_thread
- + async_offsets->asyncio_thread_state.asyncio_running_task,
+ + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task,
running_task_addr);
if (err) {
return -1;
}
static int
-append_awaited_by_for_thread(
- proc_handle_t *handle,
- uintptr_t head_addr,
- struct _Py_DebugOffsets *debug_offsets,
- struct _Py_AsyncioModuleDebugOffsets *async_offsets,
- PyObject *result
+find_running_task_and_coro(
+ RemoteUnwinderObject *self,
+ uintptr_t *running_task_addr,
+ uintptr_t *running_coro_addr,
+ uintptr_t *running_task_code_obj
) {
- struct llist_node task_node;
-
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- head_addr,
- sizeof(task_node),
- &task_node))
- {
+ *running_task_addr = (uintptr_t)NULL;
+ if (find_running_task(
+ self, running_task_addr) < 0) {
+ chain_exceptions(PyExc_RuntimeError, "Failed to find running task");
return -1;
}
- size_t iteration_count = 0;
- const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound
- while ((uintptr_t)task_node.next != head_addr) {
- if (++iteration_count > MAX_ITERATIONS) {
- PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted");
- return -1;
- }
-
- if (task_node.next == NULL) {
- PyErr_SetString(
- PyExc_RuntimeError,
- "Invalid linked list structure reading remote memory");
- return -1;
- }
-
- uintptr_t task_addr = (uintptr_t)task_node.next
- - async_offsets->asyncio_task_object.task_node;
-
- PyObject *tn = parse_task_name(
- handle,
- debug_offsets,
- async_offsets,
- task_addr);
- if (tn == NULL) {
- return -1;
- }
-
- PyObject *current_awaited_by = PyList_New(0);
- if (current_awaited_by == NULL) {
- Py_DECREF(tn);
- return -1;
- }
-
- PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr);
- if (task_id == NULL) {
- Py_DECREF(tn);
- Py_DECREF(current_awaited_by);
- return -1;
- }
-
- PyObject *result_item = PyTuple_New(3);
- if (result_item == NULL) {
- Py_DECREF(tn);
- Py_DECREF(current_awaited_by);
- Py_DECREF(task_id);
- return -1;
- }
-
- PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref
- PyTuple_SET_ITEM(result_item, 1, tn); // steals ref
- PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref
- if (PyList_Append(result, result_item)) {
- Py_DECREF(result_item);
- return -1;
- }
- Py_DECREF(result_item);
-
- if (parse_task_awaited_by(handle, debug_offsets, async_offsets,
- task_addr, current_awaited_by, 0))
- {
- return -1;
- }
-
- // onto the next one...
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- (uintptr_t)task_node.next,
- sizeof(task_node),
- &task_node))
- {
- return -1;
- }
- }
-
- return 0;
-}
-
-static int
-append_awaited_by(
- proc_handle_t *handle,
- unsigned long tid,
- uintptr_t head_addr,
- struct _Py_DebugOffsets *debug_offsets,
- struct _Py_AsyncioModuleDebugOffsets *async_offsets,
- PyObject *result)
-{
- PyObject *tid_py = PyLong_FromUnsignedLong(tid);
- if (tid_py == NULL) {
+ if ((void*)*running_task_addr == NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "No running task found");
return -1;
}
- PyObject *result_item = PyTuple_New(2);
- if (result_item == NULL) {
- Py_DECREF(tid_py);
+ if (read_py_ptr(
+ &self->handle,
+ *running_task_addr + self->async_debug_offsets.asyncio_task_object.task_coro,
+ running_coro_addr) < 0) {
+ chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro");
return -1;
}
- PyObject* awaited_by_for_thread = PyList_New(0);
- if (awaited_by_for_thread == NULL) {
- Py_DECREF(tid_py);
- Py_DECREF(result_item);
+ if ((void*)*running_coro_addr == NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL");
return -1;
}
- PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref
- PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref
- if (PyList_Append(result, result_item)) {
- Py_DECREF(result_item);
+ // note: genobject's gi_iframe is an embedded struct so the address to
+ // the offset leads directly to its first field: f_executable
+ if (read_py_ptr(
+ &self->handle,
+ *running_coro_addr + self->debug_offsets.gen_object.gi_iframe,
+ running_task_code_obj) < 0) {
return -1;
}
- Py_DECREF(result_item);
- if (append_awaited_by_for_thread(
- handle,
- head_addr,
- debug_offsets,
- async_offsets,
- awaited_by_for_thread))
- {
+ if ((void*)*running_task_code_obj == NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL");
return -1;
}
return 0;
}
-static PyObject*
-get_all_awaited_by(PyObject* self, PyObject* args)
-{
-#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \
- (defined(__linux__) && !HAVE_PROCESS_VM_READV)
- PyErr_SetString(
- PyExc_RuntimeError,
- "get_all_awaited_by is not implemented on this platform");
- return NULL;
-#endif
- int pid;
- if (!PyArg_ParseTuple(args, "i", &pid)) {
- return NULL;
+/* ============================================================================
+ * FRAME PARSING FUNCTIONS
+ * ============================================================================ */
+
+static int
+parse_frame_object(
+ RemoteUnwinderObject *unwinder,
+ PyObject** result,
+ uintptr_t address,
+ uintptr_t* previous_frame
+) {
+ char frame[SIZEOF_INTERP_FRAME];
+
+ Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address,
+ SIZEOF_INTERP_FRAME,
+ frame
+ );
+ if (bytes_read < 0) {
+ return -1;
}
- proc_handle_t the_handle;
- proc_handle_t *handle = &the_handle;
- if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) {
+ *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
+
+ if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER) {
return 0;
}
- PyObject *result = NULL;
-
- uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle);
- if (runtime_start_addr == 0) {
- if (!PyErr_Occurred()) {
- PyErr_SetString(
- PyExc_RuntimeError, "Failed to get .PyRuntime address");
- }
- goto result_err;
+ if ((void*)GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable) == NULL) {
+ return 0;
}
- struct _Py_DebugOffsets local_debug_offsets;
- if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets");
- goto result_err;
+ uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
+
+ // Get tlbc_index for free threading builds
+ int32_t tlbc_index = 0;
+#ifdef Py_GIL_DISABLED
+ if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
+ tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
}
+#endif
- struct _Py_AsyncioModuleDebugOffsets local_async_debug;
- if (read_async_debug(handle, &local_async_debug)) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets");
- goto result_err;
+ return parse_code_object(
+ unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable),
+ instruction_pointer, previous_frame, tlbc_index);
+}
+
+static int
+parse_async_frame_object(
+ RemoteUnwinderObject *unwinder,
+ PyObject** result,
+ uintptr_t address,
+ uintptr_t* previous_frame,
+ uintptr_t* code_object
+) {
+ char frame[SIZEOF_INTERP_FRAME];
+
+ Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ address,
+ SIZEOF_INTERP_FRAME,
+ frame
+ );
+ if (bytes_read < 0) {
+ return -1;
}
- result = PyList_New(0);
- if (result == NULL) {
- goto result_err;
+ *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
+
+ if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK ||
+ GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
+ return 0; // C frame
}
- uint64_t interpreter_state_list_head =
- local_debug_offsets.runtime_state.interpreters_head;
+ if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
+ && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
+ PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
+ GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
+ return -1;
+ }
- uintptr_t interpreter_state_addr;
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- runtime_start_addr + interpreter_state_list_head,
- sizeof(void*),
- &interpreter_state_addr))
- {
- goto result_err;
+ *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
+ // Strip tag bits for consistent comparison
+ *code_object &= ~Py_TAG_BITS;
+
+ assert(code_object != NULL);
+ if ((void*)*code_object == NULL) {
+ return 0;
}
- uintptr_t thread_state_addr;
- unsigned long tid = 0;
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- interpreter_state_addr
- + local_debug_offsets.interpreter_state.threads_head,
- sizeof(void*),
- &thread_state_addr))
- {
- goto result_err;
+ uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
+
+ // Get tlbc_index for free threading builds
+ int32_t tlbc_index = 0;
+#ifdef Py_GIL_DISABLED
+ if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
+ tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
}
+#endif
- uintptr_t head_addr;
- while (thread_state_addr != 0) {
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- thread_state_addr
- + local_debug_offsets.thread_state.native_thread_id,
- sizeof(tid),
- &tid))
- {
- goto result_err;
+ if (parse_code_object(
+ unwinder, result, *code_object, instruction_pointer, previous_frame, tlbc_index)) {
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+parse_async_frame_chain(
+ RemoteUnwinderObject *self,
+ PyObject *calls,
+ uintptr_t running_task_code_obj
+) {
+ uintptr_t address_of_current_frame;
+ if (find_running_frame(self, self->runtime_start_address, &address_of_current_frame) < 0) {
+ chain_exceptions(PyExc_RuntimeError, "Failed to find running frame");
+ return -1;
+ }
+
+ uintptr_t address_of_code_object;
+ while ((void*)address_of_current_frame != NULL) {
+ PyObject* frame_info = NULL;
+ int res = parse_async_frame_object(
+ self,
+ &frame_info,
+ address_of_current_frame,
+ &address_of_current_frame,
+ &address_of_code_object
+ );
+
+ if (res < 0) {
+ chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object");
+ return -1;
}
- head_addr = thread_state_addr
- + local_async_debug.asyncio_thread_state.asyncio_tasks_head;
+ if (!frame_info) {
+ continue;
+ }
- if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets,
- &local_async_debug, result))
- {
- goto result_err;
+ if (PyList_Append(calls, frame_info) == -1) {
+ Py_DECREF(frame_info);
+ return -1;
}
- if (0 > _Py_RemoteDebug_ReadRemoteMemory(
- handle,
- thread_state_addr + local_debug_offsets.thread_state.next,
- sizeof(void*),
- &thread_state_addr))
- {
- goto result_err;
+ Py_DECREF(frame_info);
+
+ if (address_of_code_object == running_task_code_obj) {
+ break;
}
}
- head_addr = interpreter_state_addr
- + local_async_debug.asyncio_interpreter_state.asyncio_tasks_head;
+ return 0;
+}
- // On top of a per-thread task lists used by default by asyncio to avoid
- // contention, there is also a fallback per-interpreter list of tasks;
- // any tasks still pending when a thread is destroyed will be moved to the
- // per-interpreter task list. It's unlikely we'll find anything here, but
- // interesting for debugging.
- if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets,
- &local_async_debug, result))
- {
- goto result_err;
+/* ============================================================================
+ * AWAITED BY PARSING FUNCTIONS
+ * ============================================================================ */
+
+static int
+append_awaited_by_for_thread(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t head_addr,
+ PyObject *result
+) {
+ char task_node[SIZEOF_LLIST_NODE];
+
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr,
+ sizeof(task_node), task_node) < 0) {
+ return -1;
}
- _Py_RemoteDebug_CleanupProcHandle(handle);
- return result;
+ size_t iteration_count = 0;
+ const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound
-result_err:
- Py_XDECREF(result);
- _Py_RemoteDebug_CleanupProcHandle(handle);
- return NULL;
-}
+ while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) {
+ if (++iteration_count > MAX_ITERATIONS) {
+ PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted");
+ return -1;
+ }
-static PyObject*
-get_stack_trace(PyObject* self, PyObject* args)
-{
-#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \
- (defined(__linux__) && !HAVE_PROCESS_VM_READV)
- PyErr_SetString(
- PyExc_RuntimeError,
- "get_stack_trace is not supported on this platform");
- return NULL;
-#endif
+ if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Invalid linked list structure reading remote memory");
+ return -1;
+ }
- int pid;
- if (!PyArg_ParseTuple(args, "i", &pid)) {
- return NULL;
- }
+ uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
+ - unwinder->async_debug_offsets.asyncio_task_object.task_node;
- proc_handle_t the_handle;
- proc_handle_t *handle = &the_handle;
- if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) {
- return 0;
+ if (process_single_task_node(unwinder, task_addr, result) < 0) {
+ return -1;
+ }
+
+ // Read next node
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle,
+ (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
+ sizeof(task_node),
+ task_node) < 0) {
+ return -1;
+ }
}
- PyObject* result = NULL;
+ return 0;
+}
- uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle);
- if (runtime_start_address == 0) {
- if (!PyErr_Occurred()) {
- PyErr_SetString(
- PyExc_RuntimeError, "Failed to get .PyRuntime address");
- }
- goto result_err;
+static int
+append_awaited_by(
+ RemoteUnwinderObject *unwinder,
+ unsigned long tid,
+ uintptr_t head_addr,
+ PyObject *result)
+{
+ PyObject *tid_py = PyLong_FromUnsignedLong(tid);
+ if (tid_py == NULL) {
+ return -1;
}
- struct _Py_DebugOffsets local_debug_offsets;
- if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets");
- goto result_err;
+ PyObject *result_item = PyTuple_New(2);
+ if (result_item == NULL) {
+ Py_DECREF(tid_py);
+ return -1;
}
- uintptr_t address_of_current_frame;
- if (find_running_frame(
- handle, runtime_start_address, &local_debug_offsets,
- &address_of_current_frame)
- ) {
- goto result_err;
+ PyObject* awaited_by_for_thread = PyList_New(0);
+ if (awaited_by_for_thread == NULL) {
+ Py_DECREF(tid_py);
+ Py_DECREF(result_item);
+ return -1;
}
- result = PyList_New(0);
- if (result == NULL) {
- goto result_err;
+ PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref
+ PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref
+ if (PyList_Append(result, result_item)) {
+ Py_DECREF(result_item);
+ return -1;
}
+ Py_DECREF(result_item);
- while ((void*)address_of_current_frame != NULL) {
- PyObject* frame_info = NULL;
- if (parse_frame_object(
- handle,
- &frame_info,
- &local_debug_offsets,
- address_of_current_frame,
- &address_of_current_frame)
- < 0)
- {
- Py_CLEAR(result);
- goto result_err;
+ if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread))
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+/* ============================================================================
+ * STACK UNWINDING FUNCTIONS
+ * ============================================================================ */
+
+static int
+process_frame_chain(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t initial_frame_addr,
+ StackChunkList *chunks,
+ PyObject *frame_info
+) {
+ uintptr_t frame_addr = initial_frame_addr;
+ uintptr_t prev_frame_addr = 0;
+ const size_t MAX_FRAMES = 1024;
+ size_t frame_count = 0;
+
+ while ((void*)frame_addr != NULL) {
+ PyObject *frame = NULL;
+ uintptr_t next_frame_addr = 0;
+
+ if (++frame_count > MAX_FRAMES) {
+ PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
+ return -1;
}
- if (!frame_info) {
- continue;
+ // Try chunks first, fallback to direct memory read
+ if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) {
+ PyErr_Clear();
+ if (parse_frame_object(unwinder, &frame, frame_addr, &next_frame_addr) < 0) {
+ return -1;
+ }
}
- if (PyList_Append(result, frame_info) == -1) {
- Py_CLEAR(result);
- goto result_err;
+ if (!frame) {
+ break;
}
- Py_DECREF(frame_info);
- frame_info = NULL;
+ if (prev_frame_addr && frame_addr != prev_frame_addr) {
+ PyErr_Format(PyExc_RuntimeError,
+ "Broken frame chain: expected frame at 0x%lx, got 0x%lx",
+ prev_frame_addr, frame_addr);
+ Py_DECREF(frame);
+ return -1;
+ }
+ if (PyList_Append(frame_info, frame) == -1) {
+ Py_DECREF(frame);
+ return -1;
+ }
+ Py_DECREF(frame);
+
+ prev_frame_addr = next_frame_addr;
+ frame_addr = next_frame_addr;
}
-result_err:
- _Py_RemoteDebug_CleanupProcHandle(handle);
- return result;
+ return 0;
}
static PyObject*
-get_async_stack_trace(PyObject* self, PyObject* args)
-{
-#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \
- (defined(__linux__) && !HAVE_PROCESS_VM_READV)
- PyErr_SetString(
- PyExc_RuntimeError,
- "get_stack_trace is not supported on this platform");
- return NULL;
-#endif
- int pid;
+unwind_stack_for_thread(
+ RemoteUnwinderObject *unwinder,
+ uintptr_t *current_tstate
+) {
+ PyObject *frame_info = NULL;
+ PyObject *thread_id = NULL;
+ PyObject *result = NULL;
+ StackChunkList chunks = {0};
- if (!PyArg_ParseTuple(args, "i", &pid)) {
- return NULL;
+ char ts[SIZEOF_THREAD_STATE];
+ int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+ &unwinder->handle, *current_tstate, unwinder->debug_offsets.thread_state.size, ts);
+ if (bytes_read < 0) {
+ goto error;
}
- proc_handle_t the_handle;
- proc_handle_t *handle = &the_handle;
- if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) {
- return 0;
- }
+ uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame);
- PyObject *result = NULL;
+ frame_info = PyList_New(0);
+ if (!frame_info) {
+ goto error;
+ }
- uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle);
- if (runtime_start_address == 0) {
- if (!PyErr_Occurred()) {
- PyErr_SetString(
- PyExc_RuntimeError, "Failed to get .PyRuntime address");
- }
- goto result_err;
+ if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
+ goto error;
}
- struct _Py_DebugOffsets local_debug_offsets;
- if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets");
- goto result_err;
+ if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) {
+ goto error;
}
- struct _Py_AsyncioModuleDebugOffsets local_async_debug;
- if (read_async_debug(handle, &local_async_debug)) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets");
- goto result_err;
+ *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
+
+ thread_id = PyLong_FromLongLong(
+ GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id));
+ if (thread_id == NULL) {
+ goto error;
}
- result = PyList_New(1);
+ result = PyTuple_New(2);
if (result == NULL) {
- goto result_err;
+ goto error;
}
- PyObject* calls = PyList_New(0);
- if (calls == NULL) {
- goto result_err;
+
+ PyTuple_SET_ITEM(result, 0, thread_id); // Steals reference
+ PyTuple_SET_ITEM(result, 1, frame_info); // Steals reference
+
+ cleanup_stack_chunks(&chunks);
+ return result;
+
+error:
+ Py_XDECREF(frame_info);
+ Py_XDECREF(thread_id);
+ Py_XDECREF(result);
+ cleanup_stack_chunks(&chunks);
+ return NULL;
+}
+
+
+/* ============================================================================
+ * REMOTEUNWINDER CLASS IMPLEMENTATION
+ * ============================================================================ */
+
+/*[clinic input]
+class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/
+
+/*[clinic input]
+_remote_debugging.RemoteUnwinder.__init__
+ pid: int
+ *
+ all_threads: bool = False
+
+Initialize a new RemoteUnwinder object for debugging a remote Python process.
+
+Args:
+ pid: Process ID of the target Python process to debug
+ all_threads: If True, initialize state for all threads in the process.
+ If False, only initialize for the main thread.
+
+The RemoteUnwinder provides functionality to inspect and debug a running Python
+process, including examining thread states, stack frames and other runtime data.
+
+Raises:
+ PermissionError: If access to the target process is denied
+ OSError: If unable to attach to the target process or access its memory
+ RuntimeError: If unable to read debug information from the target process
+[clinic start generated code]*/
+
+static int
+_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
+ int pid, int all_threads)
+/*[clinic end generated code: output=b8027cb247092081 input=6a2056b04e6f050e]*/
+{
+ if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
+ return -1;
}
- if (PyList_SetItem(result, 0, calls)) { /* steals ref to 'calls' */
- Py_DECREF(calls);
- goto result_err;
+
+ self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle);
+ if (self->runtime_start_address == 0) {
+ return -1;
}
- uintptr_t running_task_addr = (uintptr_t)NULL;
- if (find_running_task(
- handle, runtime_start_address, &local_debug_offsets, &local_async_debug,
- &running_task_addr)
- ) {
- chain_exceptions(PyExc_RuntimeError, "Failed to find running task");
- goto result_err;
+ if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle,
+ &self->runtime_start_address,
+ &self->debug_offsets) < 0)
+ {
+ return -1;
}
- if ((void*)running_task_addr == NULL) {
- PyErr_SetString(PyExc_RuntimeError, "No running task found");
- goto result_err;
+ // Try to read async debug offsets, but don't fail if they're not available
+ self->async_debug_offsets_available = 1;
+ if (read_async_debug(self) < 0) {
+ PyErr_Clear();
+ memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
+ self->async_debug_offsets_available = 0;
}
- uintptr_t running_coro_addr;
- if (read_py_ptr(
- handle,
- running_task_addr + local_async_debug.asyncio_task_object.task_coro,
- &running_coro_addr
- )) {
- chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro");
- goto result_err;
+ if (populate_initial_state_data(all_threads, self, self->runtime_start_address,
+ &self->interpreter_addr ,&self->tstate_addr) < 0)
+ {
+ return -1;
}
- if ((void*)running_coro_addr == NULL) {
- PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL");
- goto result_err;
+ self->code_object_cache = _Py_hashtable_new_full(
+ _Py_hashtable_hash_ptr,
+ _Py_hashtable_compare_direct,
+ NULL, // keys are stable pointers, don't destroy
+ cached_code_metadata_destroy,
+ NULL
+ );
+ if (self->code_object_cache == NULL) {
+ PyErr_NoMemory();
+ return -1;
}
- // note: genobject's gi_iframe is an embedded struct so the address to
- // the offset leads directly to its first field: f_executable
- uintptr_t address_of_running_task_code_obj;
- if (read_py_ptr(
- handle,
- running_coro_addr + local_debug_offsets.gen_object.gi_iframe,
- &address_of_running_task_code_obj
- )) {
- goto result_err;
+#ifdef Py_GIL_DISABLED
+ // Initialize TLBC cache
+ self->tlbc_generation = 0;
+ self->tlbc_cache = _Py_hashtable_new_full(
+ _Py_hashtable_hash_ptr,
+ _Py_hashtable_compare_direct,
+ NULL, // keys are stable pointers, don't destroy
+ tlbc_cache_entry_destroy,
+ NULL
+ );
+ if (self->tlbc_cache == NULL) {
+ _Py_hashtable_destroy(self->code_object_cache);
+ PyErr_NoMemory();
+ return -1;
}
+#endif
- if ((void*)address_of_running_task_code_obj == NULL) {
- PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL");
- goto result_err;
+ return 0;
+}
+
+/*[clinic input]
+@critical_section
+_remote_debugging.RemoteUnwinder.get_stack_trace
+
+Returns a list of stack traces for all threads in the target process.
+
+Each element in the returned list is a tuple of (thread_id, frame_list), where:
+- thread_id is the OS thread identifier
+- frame_list is a list of tuples (function_name, filename, line_number) representing
+ the Python stack frames for that thread, ordered from most recent to oldest
+
+Example:
+ [
+ (1234, [
+ ('process_data', 'worker.py', 127),
+ ('run_worker', 'worker.py', 45),
+ ('main', 'app.py', 23)
+ ]),
+ (1235, [
+ ('handle_request', 'server.py', 89),
+ ('serve_forever', 'server.py', 52)
+ ])
+ ]
+
+Raises:
+ RuntimeError: If there is an error copying memory from the target process
+ OSError: If there is an error accessing the target process
+ PermissionError: If access to the target process is denied
+ UnicodeDecodeError: If there is an error decoding strings from the target process
+
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/
+{
+ PyObject* result = NULL;
+ // Read interpreter state into opaque buffer
+ char interp_state_buffer[INTERP_STATE_BUFFER_SIZE];
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(
+ &self->handle,
+ self->interpreter_addr,
+ INTERP_STATE_BUFFER_SIZE,
+ interp_state_buffer) < 0) {
+ goto exit;
}
- uintptr_t address_of_current_frame;
- if (find_running_frame(
- handle, runtime_start_address, &local_debug_offsets,
- &address_of_current_frame)
- ) {
- chain_exceptions(PyExc_RuntimeError, "Failed to find running frame");
- goto result_err;
+ // Get code object generation from buffer
+ uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer,
+ self->debug_offsets.interpreter_state.code_object_generation);
+
+ if (code_object_generation != self->code_object_generation) {
+ self->code_object_generation = code_object_generation;
+ _Py_hashtable_clear(self->code_object_cache);
}
- uintptr_t address_of_code_object;
- while ((void*)address_of_current_frame != NULL) {
- PyObject* frame_info = NULL;
- int res = parse_async_frame_object(
- handle,
- &frame_info,
- &local_debug_offsets,
- address_of_current_frame,
- &address_of_current_frame,
- &address_of_code_object
- );
+#ifdef Py_GIL_DISABLED
+ // Check TLBC generation and invalidate cache if needed
+ uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
+ self->debug_offsets.interpreter_state.tlbc_generation);
+ if (current_tlbc_generation != self->tlbc_generation) {
+ self->tlbc_generation = current_tlbc_generation;
+ _Py_hashtable_clear(self->tlbc_cache);
+ }
+#endif
- if (res < 0) {
- chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object");
- goto result_err;
- }
+ uintptr_t current_tstate;
+ if (self->tstate_addr == 0) {
+ // Get threads head from buffer
+ current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
+ self->debug_offsets.interpreter_state.threads_head);
+ } else {
+ current_tstate = self->tstate_addr;
+ }
+ result = PyList_New(0);
+ if (!result) {
+ goto exit;
+ }
+
+ while (current_tstate != 0) {
+ PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate);
if (!frame_info) {
- continue;
+ Py_CLEAR(result);
+ goto exit;
}
- if (PyList_Append(calls, frame_info) == -1) {
- Py_DECREF(calls);
- goto result_err;
+ if (PyList_Append(result, frame_info) == -1) {
+ Py_DECREF(frame_info);
+ Py_CLEAR(result);
+ goto exit;
}
-
Py_DECREF(frame_info);
- frame_info = NULL;
- if (address_of_code_object == address_of_running_task_code_obj) {
+ // We are targeting a single tstate, break here
+ if (self->tstate_addr) {
break;
}
}
- PyObject *tn = parse_task_name(
- handle, &local_debug_offsets, &local_async_debug, running_task_addr);
- if (tn == NULL) {
- goto result_err;
+exit:
+ _Py_RemoteDebug_ClearCache(&self->handle);
+ return result;
+}
+
+/*[clinic input]
+@critical_section
+_remote_debugging.RemoteUnwinder.get_all_awaited_by
+
+Get all tasks and their awaited_by relationships from the remote process.
+
+This provides a tree structure showing which tasks are waiting for other tasks.
+
+For each task, returns:
+1. The call stack frames leading to where the task is currently executing
+2. The name of the task
+3. A list of tasks that this task is waiting for, with their own frames/names/etc
+
+Returns a list of [frames, task_name, subtasks] where:
+- frames: List of (func_name, filename, lineno) showing the call stack
+- task_name: String identifier for the task
+- subtasks: List of tasks being awaited by this task, in same format
+
+Raises:
+ RuntimeError: If AsyncioDebug section is not available in the remote process
+ MemoryError: If memory allocation fails
+ OSError: If reading from the remote process fails
+
+Example output:
+[
+ # Task c2_root waiting for two subtasks
+ [
+ # Call stack of c2_root
+ [("c5", "script.py", 10), ("c4", "script.py", 14)],
+ "c2_root",
+ [
+ # First subtask (sub_main_2) and what it's waiting for
+ [
+ [("c1", "script.py", 23)],
+ "sub_main_2",
+ [...]
+ ],
+ # Second subtask and its waiters
+ [...]
+ ]
+ ]
+]
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=6a49cd345e8aec53 input=a452c652bb00701a]*/
+{
+ if (!self->async_debug_offsets_available) {
+ PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
+ return NULL;
}
- if (PyList_Append(result, tn)) {
- Py_DECREF(tn);
+
+ PyObject *result = PyList_New(0);
+ if (result == NULL) {
goto result_err;
}
- Py_DECREF(tn);
- PyObject* awaited_by = PyList_New(0);
- if (awaited_by == NULL) {
+ uintptr_t thread_state_addr;
+ unsigned long tid = 0;
+ if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+ &self->handle,
+ self->interpreter_addr
+ + self->debug_offsets.interpreter_state.threads_main,
+ sizeof(void*),
+ &thread_state_addr))
+ {
goto result_err;
}
- if (PyList_Append(result, awaited_by)) {
- Py_DECREF(awaited_by);
- goto result_err;
+
+ uintptr_t head_addr;
+ while (thread_state_addr != 0) {
+ if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+ &self->handle,
+ thread_state_addr
+ + self->debug_offsets.thread_state.native_thread_id,
+ sizeof(tid),
+ &tid))
+ {
+ goto result_err;
+ }
+
+ head_addr = thread_state_addr
+ + self->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head;
+
+ if (append_awaited_by(self, tid, head_addr, result))
+ {
+ goto result_err;
+ }
+
+ if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+ &self->handle,
+ thread_state_addr + self->debug_offsets.thread_state.next,
+ sizeof(void*),
+ &thread_state_addr))
+ {
+ goto result_err;
+ }
}
- Py_DECREF(awaited_by);
- if (parse_task_awaited_by(
- handle, &local_debug_offsets, &local_async_debug,
- running_task_addr, awaited_by, 1)
- ) {
+ head_addr = self->interpreter_addr
+ + self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head;
+
+ // On top of a per-thread task lists used by default by asyncio to avoid
+ // contention, there is also a fallback per-interpreter list of tasks;
+ // any tasks still pending when a thread is destroyed will be moved to the
+ // per-interpreter task list. It's unlikely we'll find anything here, but
+ // interesting for debugging.
+ if (append_awaited_by(self, 0, head_addr, result))
+ {
goto result_err;
}
- _Py_RemoteDebug_CleanupProcHandle(handle);
+ _Py_RemoteDebug_ClearCache(&self->handle);
return result;
result_err:
- _Py_RemoteDebug_CleanupProcHandle(handle);
+ _Py_RemoteDebug_ClearCache(&self->handle);
Py_XDECREF(result);
return NULL;
}
+/*[clinic input]
+@critical_section
+_remote_debugging.RemoteUnwinder.get_async_stack_trace
-static PyMethodDef methods[] = {
- {"get_stack_trace", get_stack_trace, METH_VARARGS,
- "Get the Python stack from a given pid"},
- {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS,
- "Get the asyncio stack from a given pid"},
- {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS,
- "Get all tasks and their awaited_by from a given pid"},
- {NULL, NULL, 0, NULL},
+Returns information about the currently running async task and its stack trace.
+
+Returns a tuple of (task_info, stack_frames) where:
+- task_info is a tuple of (task_id, task_name) identifying the task
+- stack_frames is a list of tuples (function_name, filename, line_number) representing
+ the Python stack frames for the task, ordered from most recent to oldest
+
+Example:
+ ((4345585712, 'Task-1'), [
+ ('run_echo_server', 'server.py', 127),
+ ('serve_forever', 'server.py', 45),
+ ('main', 'app.py', 23)
+ ])
+
+Raises:
+ RuntimeError: If AsyncioDebug section is not available in the target process
+ RuntimeError: If there is an error copying memory from the target process
+ OSError: If there is an error accessing the target process
+ PermissionError: If access to the target process is denied
+ UnicodeDecodeError: If there is an error decoding strings from the target process
+
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=6433d52b55e87bbe input=11b7150c59d4c60f]*/
+{
+ if (!self->async_debug_offsets_available) {
+ PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
+ return NULL;
+ }
+
+ PyObject *result = NULL;
+ PyObject *calls = NULL;
+
+ if (setup_async_result_structure(&result, &calls) < 0) {
+ goto cleanup;
+ }
+
+ uintptr_t running_task_addr, running_coro_addr, running_task_code_obj;
+ if (find_running_task_and_coro(self, &running_task_addr,
+ &running_coro_addr, &running_task_code_obj) < 0) {
+ goto cleanup;
+ }
+
+ if (parse_async_frame_chain(self, calls, running_task_code_obj) < 0) {
+ goto cleanup;
+ }
+
+ if (add_task_info_to_result(self, result, running_task_addr) < 0) {
+ goto cleanup;
+ }
+
+ _Py_RemoteDebug_ClearCache(&self->handle);
+ return result;
+
+cleanup:
+ _Py_RemoteDebug_ClearCache(&self->handle);
+ Py_XDECREF(result);
+ return NULL;
+}
+
+static PyMethodDef RemoteUnwinder_methods[] = {
+ _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF
+ _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF
+ _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF
+ {NULL, NULL}
};
-static struct PyModuleDef module = {
- .m_base = PyModuleDef_HEAD_INIT,
- .m_name = "_remote_debugging",
- .m_size = -1,
- .m_methods = methods,
+static void
+RemoteUnwinder_dealloc(RemoteUnwinderObject *self)
+{
+ PyTypeObject *tp = Py_TYPE(self);
+ if (self->code_object_cache) {
+ _Py_hashtable_destroy(self->code_object_cache);
+ }
+#ifdef Py_GIL_DISABLED
+ if (self->tlbc_cache) {
+ _Py_hashtable_destroy(self->tlbc_cache);
+ }
+#endif
+ if (self->handle.pid != 0) {
+ _Py_RemoteDebug_ClearCache(&self->handle);
+ _Py_RemoteDebug_CleanupProcHandle(&self->handle);
+ }
+ PyObject_Del(self);
+ Py_DECREF(tp);
+}
+
+static PyType_Slot RemoteUnwinder_slots[] = {
+ {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."},
+ {Py_tp_methods, RemoteUnwinder_methods},
+ {Py_tp_init, _remote_debugging_RemoteUnwinder___init__},
+ {Py_tp_dealloc, RemoteUnwinder_dealloc},
+ {0, NULL}
};
-PyMODINIT_FUNC
-PyInit__remote_debugging(void)
+static PyType_Spec RemoteUnwinder_spec = {
+ .name = "_remote_debugging.RemoteUnwinder",
+ .basicsize = sizeof(RemoteUnwinderObject),
+ .flags = Py_TPFLAGS_DEFAULT,
+ .slots = RemoteUnwinder_slots,
+};
+
+/* ============================================================================
+ * MODULE INITIALIZATION
+ * ============================================================================ */
+
+static int
+_remote_debugging_exec(PyObject *m)
{
- PyObject* mod = PyModule_Create(&module);
- if (mod == NULL) {
- return NULL;
+ RemoteDebuggingState *st = RemoteDebugging_GetState(m);
+#define CREATE_TYPE(mod, type, spec) \
+ do { \
+ type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \
+ if (type == NULL) { \
+ return -1; \
+ } \
+ } while (0)
+
+ CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec);
+
+ if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) {
+ return -1;
}
#ifdef Py_GIL_DISABLED
- PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED);
+ PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
#endif
- int rc = PyModule_AddIntConstant(
- mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV);
+ int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV);
if (rc < 0) {
- Py_DECREF(mod);
- return NULL;
+ return -1;
}
- return mod;
+ if (RemoteDebugging_InitState(st) < 0) {
+ return -1;
+ }
+ return 0;
}
+
+static int
+remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
+{
+ RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
+ Py_VISIT(state->RemoteDebugging_Type);
+ return 0;
+}
+
+static int
+remote_debugging_clear(PyObject *mod)
+{
+ RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
+ Py_CLEAR(state->RemoteDebugging_Type);
+ return 0;
+}
+
+static void
+remote_debugging_free(void *mod)
+{
+ (void)remote_debugging_clear((PyObject *)mod);
+}
+
+static PyModuleDef_Slot remote_debugging_slots[] = {
+ {Py_mod_exec, _remote_debugging_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+ {0, NULL},
+};
+
+static PyMethodDef remote_debugging_methods[] = {
+ {NULL, NULL, 0, NULL},
+};
+
+static struct PyModuleDef remote_debugging_module = {
+ PyModuleDef_HEAD_INIT,
+ .m_name = "_remote_debugging",
+ .m_size = sizeof(RemoteDebuggingState),
+ .m_methods = remote_debugging_methods,
+ .m_slots = remote_debugging_slots,
+ .m_traverse = remote_debugging_traverse,
+ .m_clear = remote_debugging_clear,
+ .m_free = remote_debugging_free,
+};
+
+PyMODINIT_FUNC
+PyInit__remote_debugging(void)
+{
+ return PyModuleDef_Init(&remote_debugging_module);
+}
+