- Introduce a new field in the GC state to store the frame that initiated garbage collection.
- Update RemoteUnwinder to include options for including "<native>" and "<GC>" frames in the stack trace.
- Modify the sampling profiler to accept parameters for controlling the inclusion of native and GC frames.
- Enhance the stack collector to properly format and append these frames during profiling.
- Add tests to verify the correct behavior of the profiler with respect to native and GC frames, including options to exclude them.
Co-authored-by: Pablo Galindo Salgado <pablogsal@gmail.com>
Sample all threads in the process instead of just the main thread
+.. option:: --native
+
+ Include artificial ``<native>`` frames to denote calls to non-Python code.
+
+.. option:: --no-gc
+
+ Don't include artificial ``<GC>`` frames to denote active garbage collection.
+
.. option:: --realtime-stats
Print real-time sampling statistics during profiling
For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information
about statistical profiling, see :ref:`statistical-profiling`
-.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False)
+.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=False, gc=True)
Sample a Python process and generate profiling data.
:param bool show_summary: Whether to show summary statistics (default: True)
:param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats')
:param bool realtime_stats: Whether to display real-time statistics (default: False)
+ :param bool native: Whether to include ``<native>`` frames (default: False)
+ :param bool gc: Whether to include ``<GC>`` frames (default: True)
:raises ValueError: If output_format is not 'pstats' or 'collapsed'
struct _gc {
uint64_t size;
uint64_t collecting;
+ uint64_t frame;
} gc;
// Generator object offset;
.gc = { \
.size = sizeof(struct _gc_runtime_state), \
.collecting = offsetof(struct _gc_runtime_state, collecting), \
+ .frame = offsetof(struct _gc_runtime_state, frame), \
}, \
.gen_object = { \
.size = sizeof(PyGenObject), \
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(gc));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(native));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fullerror));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(gc));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(native));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name));
STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(format, ".format")
+ STRUCT_FOR_STR(gc, "<GC>")
STRUCT_FOR_STR(generic_base, ".generic_base")
STRUCT_FOR_STR(json_decoder, "json.decoder")
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
STRUCT_FOR_STR(list_err, "list index out of range")
+ STRUCT_FOR_STR(native, "<native>")
STRUCT_FOR_STR(str_replace_inf, "1e309")
STRUCT_FOR_STR(type_params, ".type_params")
STRUCT_FOR_STR(utf_8, "utf-8")
STRUCT_FOR_ID(fullerror)
STRUCT_FOR_ID(func)
STRUCT_FOR_ID(future)
+ STRUCT_FOR_ID(gc)
STRUCT_FOR_ID(generation)
STRUCT_FOR_ID(get)
STRUCT_FOR_ID(get_debug)
STRUCT_FOR_ID(name_from)
STRUCT_FOR_ID(namespace_separator)
STRUCT_FOR_ID(namespaces)
+ STRUCT_FOR_ID(native)
STRUCT_FOR_ID(ndigits)
STRUCT_FOR_ID(nested)
STRUCT_FOR_ID(new_file_name)
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
/* true if we are currently running the collector */
int collecting;
+ // The frame that started the current collection. It might be NULL even when
+ // collecting (if no Python frame is running):
+ _PyInterpreterFrame *frame;
/* list of uncollectable objects */
PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */
FRAME_OWNED_BY_GENERATOR = 1,
FRAME_OWNED_BY_FRAME_OBJECT = 2,
FRAME_OWNED_BY_INTERPRETER = 3,
- FRAME_OWNED_BY_CSTACK = 4,
};
struct _PyInterpreterFrame {
INIT_STR(dot_locals, ".<locals>"), \
INIT_STR(empty, ""), \
INIT_STR(format, ".format"), \
+ INIT_STR(gc, "<GC>"), \
INIT_STR(generic_base, ".generic_base"), \
INIT_STR(json_decoder, "json.decoder"), \
INIT_STR(kwdefaults, ".kwdefaults"), \
INIT_STR(list_err, "list index out of range"), \
+ INIT_STR(native, "<native>"), \
INIT_STR(str_replace_inf, "1e309"), \
INIT_STR(type_params, ".type_params"), \
INIT_STR(utf_8, "utf-8"), \
INIT_ID(fullerror), \
INIT_ID(func), \
INIT_ID(future), \
+ INIT_ID(gc), \
INIT_ID(generation), \
INIT_ID(get), \
INIT_ID(get_debug), \
INIT_ID(name_from), \
INIT_ID(namespace_separator), \
INIT_ID(namespaces), \
+ INIT_ID(native), \
INIT_ID(ndigits), \
INIT_ID(nested), \
INIT_ID(new_file_name), \
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(gc);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(generation);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(native);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(ndigits);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_STR(gc);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(anon_null);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_STR(native);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(anon_setcomp);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
const filename = resolveString(d.data.filename) || "";
+ // Don't show file location for special frames like <GC> and <native>
+ const isSpecialFrame = filename === "~";
+ const fileLocationHTML = isSpecialFrame ? "" : `
+ <div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
+ font-family: monospace; background: #f8f9fa;
+ padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
+ ${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
+ </div>`;
+
const tooltipHTML = `
<div>
<div style="color: #3776ab; font-weight: 600; font-size: 16px;
margin-bottom: 8px; line-height: 1.3; word-break: break-word; overflow-wrap: break-word;">
${funcname}
</div>
- <div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
- font-family: monospace; background: #f8f9fa;
- padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
- ${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
- </div>
+ ${fileLocationHTML}
<div style="display: grid; grid-template-columns: auto 1fr;
gap: 8px 16px; font-size: 14px;">
<span style="color: #5a6c7d; font-weight: 500;">Execution Time:</span>
if (i < hotSpots.length && hotSpots[i]) {
const hotspot = hotSpots[i];
const filename = hotspot.filename || 'unknown';
- const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
const lineno = hotspot.lineno ?? '?';
let funcDisplay = hotspot.funcname || 'unknown';
if (funcDisplay.length > 35) {
funcDisplay = funcDisplay.substring(0, 32) + '...';
}
- document.getElementById(`hotspot-file-${num}`).textContent = `${basename}:${lineno}`;
+ // Don't show file:line for special frames like <GC> and <native>
+ const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?');
+ let fileDisplay;
+ if (isSpecialFrame) {
+ fileDisplay = '--';
+ } else {
+ const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
+ fileDisplay = `${basename}:${lineno}`;
+ }
+
+ document.getElementById(`hotspot-file-${num}`).textContent = fileDisplay;
document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay;
document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`;
} else {
class SampleProfiler:
- def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
+ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
self.pid = pid
self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads
if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder(
- self.pid, all_threads=self.all_threads, mode=mode,
+ self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
else:
only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder(
- self.pid, only_active_thread=only_active_threads, mode=mode,
+ self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
# Track sample intervals and total sample count
output_format="pstats",
realtime_stats=False,
mode=PROFILING_MODE_WALL,
+ native=False,
+ gc=True,
):
# PROFILING_MODE_ALL implies no skipping at all
if mode == PROFILING_MODE_ALL:
skip_idle = mode != PROFILING_MODE_WALL
profiler = SampleProfiler(
- pid, sample_interval_usec, all_threads=all_threads, mode=mode,
+ pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
profiler.realtime_stats = realtime_stats
output_format=args.format,
realtime_stats=args.realtime_stats,
mode=mode,
+ native=args.native,
+ gc=args.gc,
)
sampling_group.add_argument(
"--realtime-stats",
action="store_true",
- default=False,
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
)
+ sampling_group.add_argument(
+ "--native",
+ action="store_true",
+ help="Include artificial \"<native>\" frames to denote calls to non-Python code.",
+ )
+ sampling_group.add_argument(
+ "--no-gc",
+ action="store_false",
+ dest="gc",
+ help="Don't include artificial \"<GC>\" frames to denote active garbage collection.",
+ )
# Mode options
mode_group = parser.add_argument_group("Mode options")
output_format=args.format,
realtime_stats=args.realtime_stats,
mode=mode,
+ native=args.native,
+ gc=args.gc,
)
elif args.module or args.args:
if args.module:
def export(self, filename):
lines = []
for (call_tree, thread_id), count in self.stack_counter.items():
- stack_str = ";".join(
- f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree
- )
- lines.append((f"tid:{thread_id};{stack_str}", count))
+ parts = [f"tid:{thread_id}"]
+ for file, line, func in call_tree:
+ # This is what pstats does for "special" frames:
+ if file == "~" and line == 0:
+ part = func
+ else:
+ part = f"{os.path.basename(file)}:{func}:{line}"
+ parts.append(part)
+ stack_str = ";".join(parts)
+ lines.append((stack_str, count))
lines.sort(key=lambda x: (-x[1], x[0]))
def _format_function_name(func):
filename, lineno, funcname = func
+ # Special frames like <GC> and <native> should not show file:line
+ if filename == "~" and lineno == 0:
+ return funcname
+
if len(filename) > 50:
parts = filename.split("/")
if len(parts) > 2:
FrameInfo([script_name, 12, "baz"]),
FrameInfo([script_name, 9, "bar"]),
FrameInfo([threading.__file__, ANY, "Thread.run"]),
+ FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]),
+ FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]),
]
# Is possible that there are more threads, so we check that the
# expected stack traces are in the result (looking at you Windows!)
# Should see some of our test functions
self.assertIn("slow_fibonacci", output)
-
def test_sample_target_module(self):
tempdir = tempfile.TemporaryDirectory(delete=False)
self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name)
show_summary=True,
output_format="pstats",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
show_summary=True,
output_format="pstats",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
show_summary=True,
output_format="pstats",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
show_summary=True,
output_format="pstats",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
show_summary=True,
output_format="collapsed",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
def test_cli_empty_module_name(self):
show_summary=True,
output_format="pstats",
realtime_stats=False,
- mode=0
+ mode=0,
+ native=False,
+ gc=True,
)
def test_sort_options(self):
@requires_subprocess()
@skip_if_not_supported
+class TestGCFrameTracking(unittest.TestCase):
+ """Tests for GC frame tracking in the sampling profiler."""
+
+ @classmethod
+ def setUpClass(cls):
+ """Create a static test script with GC frames and CPU-intensive work."""
+ cls.gc_test_script = '''
+import gc
+
+class ExpensiveGarbage:
+ """Class that triggers GC with expensive finalizer (callback)."""
+ def __init__(self):
+ self.cycle = self
+
+ def __del__(self):
+ # CPU-intensive work in the finalizer callback
+ result = 0
+ for i in range(100000):
+ result += i * i
+ if i % 1000 == 0:
+ result = result % 1000000
+
+def main_loop():
+ """Main loop that triggers GC with expensive callback."""
+ while True:
+ ExpensiveGarbage()
+ gc.collect()
+
+if __name__ == "__main__":
+ main_loop()
+'''
+
+ def test_gc_frames_enabled(self):
+ """Test that GC frames appear when gc tracking is enabled."""
+ with (
+ test_subprocess(self.gc_test_script) as subproc,
+ io.StringIO() as captured_output,
+ mock.patch("sys.stdout", captured_output),
+ ):
+ try:
+ profiling.sampling.sample.sample(
+ subproc.process.pid,
+ duration_sec=1,
+ sample_interval_usec=5000,
+ show_summary=False,
+ native=False,
+ gc=True,
+ )
+ except PermissionError:
+ self.skipTest("Insufficient permissions for remote profiling")
+
+ output = captured_output.getvalue()
+
+ # Should capture samples
+ self.assertIn("Captured", output)
+ self.assertIn("samples", output)
+
+ # GC frames should be present
+ self.assertIn("<GC>", output)
+
+ def test_gc_frames_disabled(self):
+ """Test that GC frames do not appear when gc tracking is disabled."""
+ with (
+ test_subprocess(self.gc_test_script) as subproc,
+ io.StringIO() as captured_output,
+ mock.patch("sys.stdout", captured_output),
+ ):
+ try:
+ profiling.sampling.sample.sample(
+ subproc.process.pid,
+ duration_sec=1,
+ sample_interval_usec=5000,
+ show_summary=False,
+ native=False,
+ gc=False,
+ )
+ except PermissionError:
+ self.skipTest("Insufficient permissions for remote profiling")
+
+ output = captured_output.getvalue()
+
+ # Should capture samples
+ self.assertIn("Captured", output)
+ self.assertIn("samples", output)
+
+ # GC frames should NOT be present
+ self.assertNotIn("<GC>", output)
+
+
+@requires_subprocess()
+@skip_if_not_supported
+class TestNativeFrameTracking(unittest.TestCase):
+ """Tests for native frame tracking in the sampling profiler."""
+
+ @classmethod
+ def setUpClass(cls):
+ """Create a static test script with native frames and CPU-intensive work."""
+ cls.native_test_script = '''
+import operator
+
+def main_loop():
+ while True:
+ # Native code in the middle of the stack:
+ operator.call(inner)
+
+def inner():
+ # Python code at the top of the stack:
+ for _ in range(1_000_0000):
+ pass
+
+if __name__ == "__main__":
+ main_loop()
+'''
+
+ def test_native_frames_enabled(self):
+ """Test that native frames appear when native tracking is enabled."""
+ collapsed_file = tempfile.NamedTemporaryFile(
+ suffix=".txt", delete=False
+ )
+ self.addCleanup(close_and_unlink, collapsed_file)
+
+ with (
+ test_subprocess(self.native_test_script) as subproc,
+ ):
+ # Suppress profiler output when testing file export
+ with (
+ io.StringIO() as captured_output,
+ mock.patch("sys.stdout", captured_output),
+ ):
+ try:
+ profiling.sampling.sample.sample(
+ subproc.process.pid,
+ duration_sec=1,
+ filename=collapsed_file.name,
+ output_format="collapsed",
+ sample_interval_usec=1000,
+ native=True,
+ )
+ except PermissionError:
+ self.skipTest("Insufficient permissions for remote profiling")
+
+ # Verify file was created and contains valid data
+ self.assertTrue(os.path.exists(collapsed_file.name))
+ self.assertGreater(os.path.getsize(collapsed_file.name), 0)
+
+ # Check file format
+ with open(collapsed_file.name, "r") as f:
+ content = f.read()
+
+ lines = content.strip().split("\n")
+ self.assertGreater(len(lines), 0)
+
+ stacks = [line.rsplit(" ", 1)[0] for line in lines]
+
+ # Most samples should have native code in the middle of the stack:
+ self.assertTrue(any(";<native>;" in stack for stack in stacks))
+
+ # No samples should have native code at the top of the stack:
+ self.assertFalse(any(stack.endswith(";<native>") for stack in stacks))
+
+ def test_native_frames_disabled(self):
+ """Test that native frames do not appear when native tracking is disabled."""
+ with (
+ test_subprocess(self.native_test_script) as subproc,
+ io.StringIO() as captured_output,
+ mock.patch("sys.stdout", captured_output),
+ ):
+ try:
+ profiling.sampling.sample.sample(
+ subproc.process.pid,
+ duration_sec=1,
+ sample_interval_usec=5000,
+ show_summary=False,
+ )
+ except PermissionError:
+ self.skipTest("Insufficient permissions for remote profiling")
+ output = captured_output.getvalue()
+ # Native frames should NOT be present:
+ self.assertNotIn("<native>", output)
+
+
class TestProcessPoolExecutorSupport(unittest.TestCase):
"""
Test that ProcessPoolExecutor works correctly with profiling.sampling.
self.assertIn("Results: [2, 4, 6]", stdout)
self.assertNotIn("Can't pickle", stderr)
-
-
if __name__ == "__main__":
unittest.main()
--- /dev/null
+Add support for ``<GC>`` and ``<native>`` frames to
+:mod:`!profiling.sampling` output to denote active garbage collection and
+calls to native code.
#include "Python.h"
#include <internal/pycore_debug_offsets.h> // _Py_DebugOffsets
#include <internal/pycore_frame.h> // FRAME_SUSPENDED_YIELD_FROM
-#include <internal/pycore_interpframe.h> // FRAME_OWNED_BY_CSTACK
+#include <internal/pycore_interpframe.h> // FRAME_OWNED_BY_INTERPRETER
#include <internal/pycore_llist.h> // struct llist_node
+#include <internal/pycore_long.h> // _PyLong_GetZero
#include <internal/pycore_stackref.h> // Py_TAG_BITS
#include "../Python/remote_debug.h"
#endif
#ifdef Py_GIL_DISABLED
-#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
- offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
- offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
- offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
+#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+ offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
+ offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+ offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
+ offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
#else
-#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
- offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
- offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
+#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+ offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+ offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
+ offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
#endif
#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
int only_active_thread;
int mode; // Use enum _ProfilingMode values
int skip_non_matching_threads; // New option to skip threads that don't match mode
+ int native;
+ int gc;
RemoteDebuggingState *cached_state; // Cached module state
#ifdef Py_GIL_DISABLED
// TLBC cache invalidation tracking
* CODE OBJECT AND FRAME PARSING FUNCTIONS
* ============================================================================ */
+static PyObject *
+make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
+ PyObject *func)
+{
+ RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+ PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
+ if (info == NULL) {
+ set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
+ return NULL;
+ }
+ Py_INCREF(file);
+ Py_INCREF(line);
+ Py_INCREF(func);
+ PyStructSequence_SetItem(info, 0, file);
+ PyStructSequence_SetItem(info, 1, line);
+ PyStructSequence_SetItem(info, 2, func);
+ return info;
+}
+
static int
parse_code_object(RemoteUnwinderObject *unwinder,
PyObject **result,
PyObject *func = NULL;
PyObject *file = NULL;
PyObject *linetable = NULL;
- PyObject *lineno = NULL;
- PyObject *tuple = NULL;
#ifdef Py_GIL_DISABLED
// In free threading builds, code object addresses might have the low bit set
info.lineno = -1;
}
- lineno = PyLong_FromLong(info.lineno);
+ PyObject *lineno = PyLong_FromLong(info.lineno);
if (!lineno) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
goto error;
}
- RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
- tuple = PyStructSequence_New(state->FrameInfo_Type);
+ PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
+ Py_DECREF(lineno);
if (!tuple) {
- set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object");
goto error;
}
- Py_INCREF(meta->func_name);
- Py_INCREF(meta->file_name);
- PyStructSequence_SetItem(tuple, 0, meta->file_name);
- PyStructSequence_SetItem(tuple, 1, lineno);
- PyStructSequence_SetItem(tuple, 2, meta->func_name);
-
*result = tuple;
return 0;
Py_XDECREF(func);
Py_XDECREF(file);
Py_XDECREF(linetable);
- Py_XDECREF(lineno);
- Py_XDECREF(tuple);
return -1;
}
PyObject **result,
uintptr_t address,
uintptr_t *previous_frame,
+ uintptr_t *stackpointer,
StackChunkList *chunks
) {
void *frame_ptr = find_frame_in_chunks(chunks, address);
char *frame = (char *)frame_ptr;
*previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
+ *stackpointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.stackpointer);
uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
if (frame_valid != 1) {
void* frame = (void*)frame_addr;
- if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK ||
- GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
+ if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
return 0; // C frame
}
RemoteUnwinderObject *unwinder,
uintptr_t initial_frame_addr,
StackChunkList *chunks,
- PyObject *frame_info
-) {
+ PyObject *frame_info,
+ uintptr_t gc_frame)
+{
uintptr_t frame_addr = initial_frame_addr;
uintptr_t prev_frame_addr = 0;
const size_t MAX_FRAMES = 1024;
while ((void*)frame_addr != NULL) {
PyObject *frame = NULL;
uintptr_t next_frame_addr = 0;
+ uintptr_t stackpointer = 0;
if (++frame_count > MAX_FRAMES) {
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
}
// Try chunks first, fallback to direct memory read
- if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) {
+ if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
PyErr_Clear();
uintptr_t address_of_code_object = 0;
if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) {
return -1;
}
}
-
- if (!frame) {
- break;
- }
-
- if (prev_frame_addr && frame_addr != prev_frame_addr) {
- PyErr_Format(PyExc_RuntimeError,
- "Broken frame chain: expected frame at 0x%lx, got 0x%lx",
- prev_frame_addr, frame_addr);
- Py_DECREF(frame);
- set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed");
+ if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) {
+ // If the first frame is missing, the chain is broken:
+ const char *e = "Failed to parse initial frame in chain";
+ PyErr_SetString(PyExc_RuntimeError, e);
return -1;
}
+ PyObject *extra_frame = NULL;
+ // This frame kicked off the current GC collection:
+ if (unwinder->gc && frame_addr == gc_frame) {
+ _Py_DECLARE_STR(gc, "<GC>");
+ extra_frame = &_Py_STR(gc);
+ }
+ // Otherwise, check for native frames to insert:
+ else if (unwinder->native &&
+ // We've reached an interpreter trampoline frame:
+ frame == NULL &&
+ // Bottommost frame is always native, so skip that one:
+ next_frame_addr &&
+ // Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame:
+ !(unwinder->gc && next_frame_addr == gc_frame))
+ {
+ _Py_DECLARE_STR(native, "<native>");
+ extra_frame = &_Py_STR(native);
+ }
+ if (extra_frame) {
+ // Use "~" as file and 0 as line, since that's what pstats uses:
+ PyObject *extra_frame_info = make_frame_info(
+ unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
+ if (extra_frame_info == NULL) {
+ return -1;
+ }
+ int error = PyList_Append(frame_info, extra_frame_info);
+ Py_DECREF(extra_frame_info);
+ if (error) {
+ const char *e = "Failed to append extra frame to frame info list";
+ set_exception_cause(unwinder, PyExc_RuntimeError, e);
+ return -1;
+ }
+ }
+ if (frame) {
+ if (prev_frame_addr && frame_addr != prev_frame_addr) {
+ const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx";
+ PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr);
+ Py_DECREF(frame);
+ const char *e = "Frame chain consistency check failed";
+ set_exception_cause(unwinder, PyExc_RuntimeError, e);
+ return -1;
+ }
- if (PyList_Append(frame_info, frame) == -1) {
+ if (PyList_Append(frame_info, frame) == -1) {
+ Py_DECREF(frame);
+ const char *e = "Failed to append frame to frame info list";
+ set_exception_cause(unwinder, PyExc_RuntimeError, e);
+ return -1;
+ }
Py_DECREF(frame);
- set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list");
- return -1;
}
- Py_DECREF(frame);
prev_frame_addr = next_frame_addr;
frame_addr = next_frame_addr;
unwind_stack_for_thread(
RemoteUnwinderObject *unwinder,
uintptr_t *current_tstate,
- uintptr_t gil_holder_tstate
+ uintptr_t gil_holder_tstate,
+ uintptr_t gc_frame
) {
PyObject *frame_info = NULL;
PyObject *thread_id = NULL;
goto error;
}
- if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) {
+ if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
goto error;
}
mode: int = 0
debug: bool = False
skip_non_matching_threads: bool = True
+ native: bool = False
+ gc: bool = False
Initialize a new RemoteUnwinder object for debugging a remote Python process.
lead to the exception.
skip_non_matching_threads: If True, skip threads that don't match the selected mode.
If False, include all threads regardless of mode.
+ native: If True, include artificial "<native>" frames to denote calls to
+ non-Python code.
+ gc: If True, include artificial "<GC>" frames to denote active garbage
+ collection.
The RemoteUnwinder provides functionality to inspect and debug a running Python
process, including examining thread states, stack frames and other runtime data.
int pid, int all_threads,
int only_active_thread,
int mode, int debug,
- int skip_non_matching_threads)
-/*[clinic end generated code: output=abf5ea5cd58bcb36 input=08fb6ace023ec3b5]*/
+ int skip_non_matching_threads,
+ int native, int gc)
+/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/
{
// Validate that all_threads and only_active_thread are not both True
if (all_threads && only_active_thread) {
}
#endif
+ self->native = native;
+ self->gc = gc;
self->debug = debug;
self->only_active_thread = only_active_thread;
self->mode = mode;
goto exit;
}
+ uintptr_t gc_frame = 0;
+ if (self->gc) {
+ gc_frame = GET_MEMBER(uintptr_t, interp_state_buffer,
+ self->debug_offsets.interpreter_state.gc
+ + self->debug_offsets.gc.frame);
+ }
+
int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer,
self->debug_offsets.interpreter_state.id);
}
while (current_tstate != 0) {
- PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate);
+ PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate,
+ gil_holder_tstate,
+ gc_frame);
if (!frame_info) {
// Check if this was an intentional skip due to mode-based filtering
if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) {
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
-" mode=0, debug=False, skip_non_matching_threads=True)\n"
+" mode=0, debug=False, skip_non_matching_threads=True,\n"
+" native=False, gc=False)\n"
"--\n"
"\n"
"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
" lead to the exception.\n"
" skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n"
" If False, include all threads regardless of mode.\n"
+" native: If True, include artificial \"<native>\" frames to denote calls to\n"
+" non-Python code.\n"
+" gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
+" collection.\n"
"\n"
"The RemoteUnwinder provides functionality to inspect and debug a running Python\n"
"process, including examining thread states, stack frames and other runtime data.\n"
int pid, int all_threads,
int only_active_thread,
int mode, int debug,
- int skip_non_matching_threads);
+ int skip_non_matching_threads,
+ int native, int gc);
static int
_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
- #define NUM_KEYWORDS 6
+ #define NUM_KEYWORDS 8
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
- .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), },
+ .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
- static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", NULL};
+ static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "RemoteUnwinder",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
- PyObject *argsbuf[6];
+ PyObject *argsbuf[8];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
int mode = 0;
int debug = 0;
int skip_non_matching_threads = 1;
+ int native = 0;
+ int gc = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
goto skip_optional_kwonly;
}
}
- skip_non_matching_threads = PyObject_IsTrue(fastargs[5]);
- if (skip_non_matching_threads < 0) {
+ if (fastargs[5]) {
+ skip_non_matching_threads = PyObject_IsTrue(fastargs[5]);
+ if (skip_non_matching_threads < 0) {
+ goto exit;
+ }
+ if (!--noptargs) {
+ goto skip_optional_kwonly;
+ }
+ }
+ if (fastargs[6]) {
+ native = PyObject_IsTrue(fastargs[6]);
+ if (native < 0) {
+ goto exit;
+ }
+ if (!--noptargs) {
+ goto skip_optional_kwonly;
+ }
+ }
+ gc = PyObject_IsTrue(fastargs[7]);
+ if (gc < 0) {
goto exit;
}
skip_optional_kwonly:
- return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads);
+ return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc);
exit:
return return_value;
return return_value;
}
-/*[clinic end generated code: output=2caefeddf7683d32 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/
// Don't start a garbage collection if one is already in progress.
return 0;
}
+ gcstate->frame = tstate->current_frame;
struct gc_collection_stats stats = { 0 };
if (reason != _Py_GC_REASON_SHUTDOWN) {
}
#endif
validate_spaces(gcstate);
+ gcstate->frame = NULL;
_Py_atomic_store_int(&gcstate->collecting, 0);
if (gcstate->debug & _PyGC_DEBUG_STATS) {
_Py_atomic_store_int(&gcstate->collecting, 0);
return 0;
}
+ gcstate->frame = tstate->current_frame;
assert(generation >= 0 && generation < NUM_GENERATIONS);
}
assert(!_PyErr_Occurred(tstate));
+ gcstate->frame = NULL;
_Py_atomic_store_int(&gcstate->collecting, 0);
return n + m;
}