]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-138122: Don't sample partial frame chains (#141912)
authorPablo Galindo Salgado <Pablogsal@gmail.com>
Sun, 7 Dec 2025 15:53:48 +0000 (15:53 +0000)
committerGitHub <noreply@github.com>
Sun, 7 Dec 2025 15:53:48 +0000 (15:53 +0000)
13 files changed:
Include/cpython/pystate.h
Include/internal/pycore_debug_offsets.h
Include/internal/pycore_tstate.h
InternalDocs/frames.md
Lib/test/test_external_inspection.py
Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst [new file with mode: 0644]
Modules/_remote_debugging/_remote_debugging.h
Modules/_remote_debugging/frames.c
Modules/_remote_debugging/threads.c
Python/ceval.c
Python/pylifecycle.c
Python/pystate.c
Python/traceback.c

index 08d71070ddccd335115282ce6fdcdd42ad1c0e49..22df26bd37a5c567927b27a716a8836bc94e4a4d 100644 (file)
@@ -135,6 +135,13 @@ struct _ts {
     /* Pointer to currently executing frame. */
     struct _PyInterpreterFrame *current_frame;
 
+    /* Pointer to the base frame (bottommost sentinel frame).
+       Used by profilers to validate complete stack unwinding.
+       Points to the embedded base_frame in _PyThreadStateImpl.
+       The frame is embedded there rather than here because _PyInterpreterFrame
+       is defined in internal headers that cannot be exposed in the public API. */
+    struct _PyInterpreterFrame *base_frame;
+
     struct _PyInterpreterFrame *last_profiled_frame;
 
     Py_tracefunc c_profilefunc;
index bfd86c08887b08f655e465a5efe3f3188ef5699e..1cdc4449b173e8703f3edeed1d85c941803a2c6c 100644 (file)
@@ -102,6 +102,7 @@ typedef struct _Py_DebugOffsets {
         uint64_t next;
         uint64_t interp;
         uint64_t current_frame;
+        uint64_t base_frame;
         uint64_t last_profiled_frame;
         uint64_t thread_id;
         uint64_t native_thread_id;
@@ -273,6 +274,7 @@ typedef struct _Py_DebugOffsets {
         .next = offsetof(PyThreadState, next), \
         .interp = offsetof(PyThreadState, interp), \
         .current_frame = offsetof(PyThreadState, current_frame), \
+        .base_frame = offsetof(PyThreadState, base_frame), \
         .last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
         .thread_id = offsetof(PyThreadState, thread_id), \
         .native_thread_id = offsetof(PyThreadState, native_thread_id), \
index 50048801b2e4eec940042dda751ac03cc6a02e7e..c4f723ac8abbbeabc9eb2a932e407ce28fad11b4 100644 (file)
@@ -10,6 +10,7 @@ extern "C" {
 
 #include "pycore_brc.h"             // struct _brc_thread_state
 #include "pycore_freelist_state.h"  // struct _Py_freelists
+#include "pycore_interpframe_structs.h"  // _PyInterpreterFrame
 #include "pycore_mimalloc.h"        // struct _mimalloc_thread_state
 #include "pycore_qsbr.h"            // struct qsbr
 #include "pycore_uop.h"             // struct _PyUOpInstruction
@@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl {
     // semi-public fields are in PyThreadState.
     PyThreadState base;
 
+    // Embedded base frame - sentinel at the bottom of the frame stack.
+    // Used by profiling/sampling to detect incomplete stack traces.
+    _PyInterpreterFrame base_frame;
+
     // The reference count field is used to synchronize deallocation of the
     // thread state during runtime finalization.
     Py_ssize_t refcount;
index d56e5481d3cbfe1f95cac008aad80f1cc958f94d..60ab2055afa7b1cd92811bf8caef5a8c53605e3b 100644 (file)
@@ -111,6 +111,35 @@ The shim frame points to a special code object containing the `INTERPRETER_EXIT`
 instruction which cleans up the shim frame and returns.
 
 
+### Base frame
+
+Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame"
+that serves as a sentinel at the bottom of the frame stack. This frame is allocated
+in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized
+when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_INTERPRETER`.
+
+External profilers and sampling tools can validate that they have successfully unwound
+the complete call stack by checking that the frame chain terminates at the base frame.
+The `PyThreadState.base_frame` pointer provides the expected address to compare against.
+If a stack walk doesn't reach this frame, the sample is incomplete (possibly due to a
+race condition) and should be discarded.
+
+The base frame is embedded in `_PyThreadStateImpl` rather than `PyThreadState` because
+`_PyInterpreterFrame` is defined in internal headers that cannot be exposed in the
+public API. A pointer (`PyThreadState.base_frame`) is provided for profilers to access
+the address without needing internal headers.
+
+See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/pystate.c).
+
+#### How profilers should use the base frame
+
+External profilers should read `tstate->base_frame` before walking the stack, then
+walk from `tstate->current_frame` following `frame->previous` pointers until reaching
+a frame with `owner == FRAME_OWNED_BY_INTERPRETER`. After the walk, verify that the
+last frame address matches `base_frame`. If not, discard the sample as incomplete
+since the frame chain may have been in an inconsistent state due to concurrent updates.
+
+
 ### Remote Profiling Frame Cache
 
 The `last_profiled_frame` field in `PyThreadState` supports an optimization for
index f664e8ac53ff9551e0c68b6e7230377b6b505f29..a97242483a89421bd39623a9a4c8ca90a34e21c1 100644 (file)
@@ -1,7 +1,7 @@
-import contextlib
 import unittest
 import os
 import textwrap
+import contextlib
 import importlib
 import sys
 import socket
@@ -216,33 +216,13 @@ def requires_subinterpreters(meth):
 # Simple wrapper functions for RemoteUnwinder
 # ============================================================================
 
-# Errors that can occur transiently when reading process memory without synchronization
-RETRIABLE_ERRORS = (
-    "Task list appears corrupted",
-    "Invalid linked list structure reading remote memory",
-    "Unknown error reading memory",
-    "Unhandled frame owner",
-    "Failed to parse initial frame",
-    "Failed to process frame chain",
-    "Failed to unwind stack",
-)
-
-
-def _is_retriable_error(exc):
-    """Check if an exception is a transient error that should be retried."""
-    msg = str(exc)
-    return any(msg.startswith(err) or err in msg for err in RETRIABLE_ERRORS)
-
-
 def get_stack_trace(pid):
     for _ in busy_retry(SHORT_TIMEOUT):
         try:
             unwinder = RemoteUnwinder(pid, all_threads=True, debug=True)
             return unwinder.get_stack_trace()
         except RuntimeError as e:
-            if _is_retriable_error(e):
-                continue
-            raise
+            continue
     raise RuntimeError("Failed to get stack trace after retries")
 
 
@@ -252,9 +232,7 @@ def get_async_stack_trace(pid):
             unwinder = RemoteUnwinder(pid, debug=True)
             return unwinder.get_async_stack_trace()
         except RuntimeError as e:
-            if _is_retriable_error(e):
-                continue
-            raise
+            continue
     raise RuntimeError("Failed to get async stack trace after retries")
 
 
@@ -264,9 +242,7 @@ def get_all_awaited_by(pid):
             unwinder = RemoteUnwinder(pid, debug=True)
             return unwinder.get_all_awaited_by()
         except RuntimeError as e:
-            if _is_retriable_error(e):
-                continue
-            raise
+            continue
     raise RuntimeError("Failed to get all awaited_by after retries")
 
 
@@ -2268,18 +2244,13 @@ sock.connect(('localhost', {port}))
     def _get_frames_with_retry(self, unwinder, required_funcs):
         """Get frames containing required_funcs, with retry for transient errors."""
         for _ in range(MAX_TRIES):
-            try:
+            with contextlib.suppress(OSError, RuntimeError):
                 traces = unwinder.get_stack_trace()
                 for interp in traces:
                     for thread in interp.threads:
                         funcs = {f.funcname for f in thread.frame_info}
                         if required_funcs.issubset(funcs):
                             return thread.frame_info
-            except RuntimeError as e:
-                if _is_retriable_error(e):
-                    pass
-                else:
-                    raise
             time.sleep(0.1)
         return None
 
@@ -2802,70 +2773,39 @@ sock.connect(('localhost', {port}))
             make_unwinder,
         ):
             unwinder = make_unwinder(cache_frames=True)
-            buffer = b""
-
-            def recv_msg():
-                """Receive a single message from socket."""
-                nonlocal buffer
-                while b"\n" not in buffer:
-                    chunk = client_socket.recv(256)
-                    if not chunk:
-                        return None
-                    buffer += chunk
-                msg, buffer = buffer.split(b"\n", 1)
-                return msg
-
-            def get_thread_frames(target_funcs):
-                """Get frames for thread matching target functions."""
-                retries = 0
-                for _ in busy_retry(SHORT_TIMEOUT):
-                    if retries >= 5:
-                        break
-                    retries += 1
-                    # On Windows, ReadProcessMemory can fail with OSError
-                    # (WinError 299) when frame pointers are in flux
-                    with contextlib.suppress(RuntimeError, OSError):
-                        traces = unwinder.get_stack_trace()
-                        for interp in traces:
-                            for thread in interp.threads:
-                                funcs = [f.funcname for f in thread.frame_info]
-                                if any(f in funcs for f in target_funcs):
-                                    return funcs
-                return None
+
+            # Message dispatch table: signal -> required functions for that thread
+            dispatch = {
+                b"t1:baz1": {"baz1", "bar1", "foo1"},
+                b"t2:baz2": {"baz2", "bar2", "foo2"},
+                b"t1:blech1": {"blech1", "foo1"},
+                b"t2:blech2": {"blech2", "foo2"},
+            }
 
             # Track results for each sync point
             results = {}
 
-            # Process 4 sync points: baz1, baz2, blech1, blech2
-            # With the lock, threads are serialized - handle one at a time
-            for _ in range(4):
-                msg = recv_msg()
-                self.assertIsNotNone(msg, "Expected message from subprocess")
-
-                # Determine which thread/function and take snapshot
-                if msg == b"t1:baz1":
-                    funcs = get_thread_frames(["baz1", "bar1", "foo1"])
-                    self.assertIsNotNone(funcs, "Thread 1 not found at baz1")
-                    results["t1:baz1"] = funcs
-                elif msg == b"t2:baz2":
-                    funcs = get_thread_frames(["baz2", "bar2", "foo2"])
-                    self.assertIsNotNone(funcs, "Thread 2 not found at baz2")
-                    results["t2:baz2"] = funcs
-                elif msg == b"t1:blech1":
-                    funcs = get_thread_frames(["blech1", "foo1"])
-                    self.assertIsNotNone(funcs, "Thread 1 not found at blech1")
-                    results["t1:blech1"] = funcs
-                elif msg == b"t2:blech2":
-                    funcs = get_thread_frames(["blech2", "foo2"])
-                    self.assertIsNotNone(funcs, "Thread 2 not found at blech2")
-                    results["t2:blech2"] = funcs
-
-                # Release thread to continue
+            # Process 4 sync points (order depends on thread scheduling)
+            buffer = _wait_for_signal(client_socket, b"\n")
+            for i in range(4):
+                # Extract first message from buffer
+                msg, sep, buffer = buffer.partition(b"\n")
+                self.assertIn(msg, dispatch, f"Unexpected message: {msg!r}")
+
+                # Sample frames for the thread at this sync point
+                required_funcs = dispatch[msg]
+                frames = self._get_frames_with_retry(unwinder, required_funcs)
+                self.assertIsNotNone(frames, f"Thread not found for {msg!r}")
+                results[msg] = [f.funcname for f in frames]
+
+                # Release thread and wait for next message (if not last)
                 client_socket.sendall(b"k")
+                if i < 3:
+                    buffer += _wait_for_signal(client_socket, b"\n")
 
             # Validate Phase 1: baz snapshots
-            t1_baz = results.get("t1:baz1")
-            t2_baz = results.get("t2:baz2")
+            t1_baz = results.get(b"t1:baz1")
+            t2_baz = results.get(b"t2:baz2")
             self.assertIsNotNone(t1_baz, "Missing t1:baz1 snapshot")
             self.assertIsNotNone(t2_baz, "Missing t2:baz2 snapshot")
 
@@ -2890,8 +2830,8 @@ sock.connect(('localhost', {port}))
             self.assertNotIn("foo1", t2_baz)
 
             # Validate Phase 2: blech snapshots (cache invalidation test)
-            t1_blech = results.get("t1:blech1")
-            t2_blech = results.get("t2:blech2")
+            t1_blech = results.get(b"t1:blech1")
+            t2_blech = results.get(b"t2:blech2")
             self.assertIsNotNone(t1_blech, "Missing t1:blech1 snapshot")
             self.assertIsNotNone(t2_blech, "Missing t2:blech2 snapshot")
 
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst
new file mode 100644 (file)
index 0000000..a4a29e4
--- /dev/null
@@ -0,0 +1,6 @@
+Add incomplete sample detection to prevent corrupted profiling data. Each
+thread state now contains an embedded base frame (sentinel at the bottom of
+the frame stack) with owner type ``FRAME_OWNED_BY_INTERPRETER``. The profiler
+validates that stack unwinding terminates at this sentinel frame. Samples that
+fail to reach the base frame (due to race conditions, memory corruption, or
+other errors) are now rejected rather than being included as spurious data.
index 804e2c904e147a8d6ed03557e5d453a573741c6a..7f3c0d363f56c6a8a638a377a90dbeacb98663c9 100644 (file)
@@ -401,6 +401,7 @@ extern int process_frame_chain(
     uintptr_t initial_frame_addr,
     StackChunkList *chunks,
     PyObject *frame_info,
+    uintptr_t base_frame_addr,
     uintptr_t gc_frame,
     uintptr_t last_profiled_frame,
     int *stopped_at_cached_frame,
index b77c0ca556d5b3c4f0268ae139d8b1e7ab59bc38..eaf3287c6fec1215be458186778eeb0283128cda 100644 (file)
@@ -154,14 +154,13 @@ is_frame_valid(
 
     void* frame = (void*)frame_addr;
 
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
-        return 0;  // C frame
+    char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner);
+    if (owner == FRAME_OWNED_BY_INTERPRETER) {
+        return 0;  // C frame or sentinel base frame
     }
 
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
-        && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
-        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
-                    GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
+    if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) {
+        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
         set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
         return -1;
     }
@@ -260,6 +259,7 @@ process_frame_chain(
     uintptr_t initial_frame_addr,
     StackChunkList *chunks,
     PyObject *frame_info,
+    uintptr_t base_frame_addr,
     uintptr_t gc_frame,
     uintptr_t last_profiled_frame,
     int *stopped_at_cached_frame,
@@ -269,6 +269,7 @@ process_frame_chain(
 {
     uintptr_t frame_addr = initial_frame_addr;
     uintptr_t prev_frame_addr = 0;
+    uintptr_t last_frame_addr = 0;  // Track last frame visited for validation
     const size_t MAX_FRAMES = 1024 + 512;
     size_t frame_count = 0;
 
@@ -296,6 +297,7 @@ process_frame_chain(
         PyObject *frame = NULL;
         uintptr_t next_frame_addr = 0;
         uintptr_t stackpointer = 0;
+        last_frame_addr = frame_addr;  // Remember this frame address
 
         if (++frame_count > MAX_FRAMES) {
             PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
@@ -303,7 +305,6 @@ process_frame_chain(
             return -1;
         }
 
-        // Try chunks first, fallback to direct memory read
         if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
             PyErr_Clear();
             uintptr_t address_of_code_object = 0;
@@ -377,6 +378,17 @@ process_frame_chain(
         frame_addr = next_frame_addr;
     }
 
+    // Validate we reached the base frame (sentinel at bottom of stack)
+    // Only validate if we walked the full chain (didn't stop at cached frame)
+    // and base_frame_addr is provided (non-zero)
+    int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame;
+    if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) {
+        PyErr_Format(PyExc_RuntimeError,
+            "Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
+            base_frame_addr, last_frame_addr);
+        return -1;
+    }
+
     return 0;
 }
 
@@ -540,7 +552,7 @@ collect_frames_with_cache(
     Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
 
     int stopped_at_cached = 0;
-    if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, gc_frame,
+    if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
                             last_profiled_frame, &stopped_at_cached,
                             addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
         return -1;
@@ -562,7 +574,7 @@ collect_frames_with_cache(
             // Cache miss - continue walking from last_profiled_frame to get the rest
             STATS_INC(unwinder, frame_cache_misses);
             Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
-            if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, gc_frame,
+            if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
                                     0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
                 return -1;
             }
index ce013f902d1ed7965db1d8a61e2c4117331941ae..69819eb8dcd6453d90e1f39e4ee424a2f5d32c68 100644 (file)
@@ -380,6 +380,7 @@ unwind_stack_for_thread(
     }
 
     uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame);
+    uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame);
 
     frame_info = PyList_New(0);
     if (!frame_info) {
@@ -411,9 +412,9 @@ unwind_stack_for_thread(
             PyErr_Clear();  // Non-fatal
         }
     } else {
-        // No caching - process entire frame chain
+        // No caching - process entire frame chain with base_frame validation
         if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
-                                gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
+                                base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
             goto error;
         }
index 382ae210ebbf2bf0d86aca91810d803f310c5eef..a1d54bd058bc49f60c1ddc97b0b17c218fae11b5 100644 (file)
@@ -3352,6 +3352,9 @@ PyEval_MergeCompilerFlags(PyCompilerFlags *cf)
 {
     PyThreadState *tstate = _PyThreadState_GET();
     _PyInterpreterFrame *current_frame = tstate->current_frame;
+    if (current_frame == tstate->base_frame) {
+        current_frame = NULL;
+    }
     int result = cf->cf_flags != 0;
 
     if (current_frame != NULL) {
index 67368b5ce077aab23d4629b50a64c70ceea7f640..2527dca71d774e8f3b8e82e2e5ccb63a39a7e75d 100644 (file)
@@ -2571,7 +2571,7 @@ Py_EndInterpreter(PyThreadState *tstate)
     if (tstate != _PyThreadState_GET()) {
         Py_FatalError("thread is not current");
     }
-    if (tstate->current_frame != NULL) {
+    if (tstate->current_frame != tstate->base_frame) {
         Py_FatalError("thread still has a frame");
     }
     interp->finalizing = 1;
index c12a1418e74309992cf55ac8bc2ec9b94bbfc53a..2956e785405a0e3c90d7d100b2c1e3f1983e0d5f 100644 (file)
@@ -1482,7 +1482,31 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     // This is cleared when PyGILState_Ensure() creates the thread state.
     tstate->gilstate_counter = 1;
 
-    tstate->current_frame = NULL;
+    // Initialize the embedded base frame - sentinel at the bottom of the frame stack
+    _tstate->base_frame.previous = NULL;
+    _tstate->base_frame.f_executable = PyStackRef_None;
+    _tstate->base_frame.f_funcobj = PyStackRef_NULL;
+    _tstate->base_frame.f_globals = NULL;
+    _tstate->base_frame.f_builtins = NULL;
+    _tstate->base_frame.f_locals = NULL;
+    _tstate->base_frame.frame_obj = NULL;
+    _tstate->base_frame.instr_ptr = NULL;
+    _tstate->base_frame.stackpointer = _tstate->base_frame.localsplus;
+    _tstate->base_frame.return_offset = 0;
+    _tstate->base_frame.owner = FRAME_OWNED_BY_INTERPRETER;
+    _tstate->base_frame.visited = 0;
+#ifdef Py_DEBUG
+    _tstate->base_frame.lltrace = 0;
+#endif
+#ifdef Py_GIL_DISABLED
+    _tstate->base_frame.tlbc_index = 0;
+#endif
+    _tstate->base_frame.localsplus[0] = PyStackRef_NULL;
+
+    // current_frame starts pointing to the base frame
+    tstate->current_frame = &_tstate->base_frame;
+    // base_frame pointer for profilers to validate stack unwinding
+    tstate->base_frame = &_tstate->base_frame;
     tstate->datastack_chunk = NULL;
     tstate->datastack_top = NULL;
     tstate->datastack_limit = NULL;
@@ -1660,7 +1684,7 @@ PyThreadState_Clear(PyThreadState *tstate)
 
     int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose;
 
-    if (verbose && tstate->current_frame != NULL) {
+    if (verbose && tstate->current_frame != tstate->base_frame) {
         /* bpo-20526: After the main thread calls
            _PyInterpreterState_SetFinalizing() in Py_FinalizeEx()
            (or in Py_EndInterpreter() for subinterpreters),
index 48f9b4d04c6b1a7718bccf0ad4f4d1a9c0009893..8af63c22a9f84ec978e1d5d3c73248347d635f7a 100644 (file)
@@ -1036,7 +1036,7 @@ static int
 dump_frame(int fd, _PyInterpreterFrame *frame)
 {
     if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
-        /* Ignore trampoline frame */
+        /* Ignore trampoline frames and base frame sentinel */
         return 0;
     }