See :ref:`profiling-permissions` for platform-specific requirements.
+.. _replay-command:
+
+The ``replay`` command
+----------------------
+
+The ``replay`` command converts binary profile files to other output formats::
+
+ python -m profiling.sampling replay profile.bin
+ python -m profiling.sampling replay --flamegraph -o profile.html profile.bin
+
+This command is useful when you have captured profiling data in binary format
+and want to analyze it later or convert it to a visualization format. Binary
+profiles can be replayed multiple times to different formats without
+re-profiling.
+
+::
+
+ # Convert binary to pstats (default, prints to stdout)
+ python -m profiling.sampling replay profile.bin
+
+ # Convert binary to flame graph
+ python -m profiling.sampling replay --flamegraph -o output.html profile.bin
+
+ # Convert binary to gecko format for Firefox Profiler
+ python -m profiling.sampling replay --gecko -o profile.json profile.bin
+
+ # Convert binary to heatmap
+ python -m profiling.sampling replay --heatmap -o my_heatmap profile.bin
+
+
Profiling in production
-----------------------
interpretation of hierarchical visualizations.
+Binary format
+-------------
+
+Binary format (:option:`--binary`) produces a compact binary file for efficient
+storage of profiling data::
+
+ python -m profiling.sampling run --binary -o profile.bin script.py
+ python -m profiling.sampling attach --binary -o profile.bin 12345
+
+The :option:`--compression` option controls data compression:
+
+- ``auto`` (default): Use zstd compression if available, otherwise no
+ compression
+- ``zstd``: Force zstd compression (requires :mod:`compression.zstd` support)
+- ``none``: Disable compression
+
+::
+
+ python -m profiling.sampling run --binary --compression=zstd -o profile.bin script.py
+
+To analyze binary profiles, use the :ref:`replay-command` to convert them to
+other formats like flame graphs or pstats output.
+
+
+Record and replay workflow
+==========================
+
+The binary format combined with the replay command enables a record-and-replay
+workflow that separates data capture from analysis. Rather than generating
+visualizations during profiling, you capture raw data to a compact binary file
+and convert it to different formats later.
+
+This approach has three main benefits:
+
+- Sampling runs faster because the work of building data structures for
+ visualization is deferred until replay.
+- A single binary capture can be converted to multiple output formats
+ without re-profiling: pstats for a quick overview, flame graph for visual
+ exploration, heatmap for line-level detail.
+- Binary files are compact and easy to share with colleagues who can convert
+ them to their preferred format.
+
+A typical workflow::
+
+ # Capture profile in production or during tests
+ python -m profiling.sampling attach --binary -o profile.bin 12345
+
+ # Later, analyze with different formats
+ python -m profiling.sampling replay profile.bin
+ python -m profiling.sampling replay --flamegraph -o profile.html profile.bin
+ python -m profiling.sampling replay --heatmap -o heatmap profile.bin
+
+
Live mode
=========
Attach to and profile a running process by PID.
+.. option:: replay
+
+ Convert a binary profile file to another output format.
+
Sampling options
----------------
Generate HTML heatmap with line-level sample counts.
+.. option:: --binary
+
+ Generate high-performance binary format for later conversion with the
+ ``replay`` command.
+
+.. option:: --compression <type>
+
+ Compression for binary format: ``auto`` (use zstd if available, default),
+ ``zstd``, or ``none``.
+
.. option:: -o <path>, --output <path>
Output file or directory path. Default behavior varies by format:
- ``--pstats`` writes to stdout, ``--flamegraph`` and ``--gecko`` generate
- files like ``flamegraph.PID.html``, and ``--heatmap`` creates a directory
- named ``heatmap_PID``.
+ :option:`--pstats` writes to stdout, while other formats generate a file
+ named ``<format>_<PID>.<ext>`` (for example, ``flamegraph_12345.html``).
+ :option:`--heatmap` creates a directory named ``heatmap_<PID>``.
pstats display options
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_varnames));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(code));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(col_offset));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(collector));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(command));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(comment_factory));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile_mode));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compression));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(config));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_tb));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_val));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_value));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(excepthook));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(print_file_and_line));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_callback));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_routine));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(proto));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(protocol));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reversed));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(rounding));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(salt));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sample_interval_us));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sched_priority));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(scheduler));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(script));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(spam));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src_dir_fd));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stack_frames));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stacklevel));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(start));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(start_time_us));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(statement));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stats));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(status));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(times));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timespec));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timestamp));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timestamp_us));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timetuple));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeunit));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(top));
STRUCT_FOR_ID(co_varnames)
STRUCT_FOR_ID(code)
STRUCT_FOR_ID(col_offset)
+ STRUCT_FOR_ID(collector)
STRUCT_FOR_ID(command)
STRUCT_FOR_ID(comment_factory)
STRUCT_FOR_ID(compile_mode)
+ STRUCT_FOR_ID(compression)
STRUCT_FOR_ID(config)
STRUCT_FOR_ID(consts)
STRUCT_FOR_ID(context)
STRUCT_FOR_ID(event)
STRUCT_FOR_ID(eventmask)
STRUCT_FOR_ID(exc)
+ STRUCT_FOR_ID(exc_tb)
STRUCT_FOR_ID(exc_type)
+ STRUCT_FOR_ID(exc_val)
STRUCT_FOR_ID(exc_value)
STRUCT_FOR_ID(excepthook)
STRUCT_FOR_ID(exception)
STRUCT_FOR_ID(print_file_and_line)
STRUCT_FOR_ID(priority)
STRUCT_FOR_ID(progress)
+ STRUCT_FOR_ID(progress_callback)
STRUCT_FOR_ID(progress_routine)
STRUCT_FOR_ID(proto)
STRUCT_FOR_ID(protocol)
STRUCT_FOR_ID(reversed)
STRUCT_FOR_ID(rounding)
STRUCT_FOR_ID(salt)
+ STRUCT_FOR_ID(sample_interval_us)
STRUCT_FOR_ID(sched_priority)
STRUCT_FOR_ID(scheduler)
STRUCT_FOR_ID(script)
STRUCT_FOR_ID(spam)
STRUCT_FOR_ID(src)
STRUCT_FOR_ID(src_dir_fd)
+ STRUCT_FOR_ID(stack_frames)
STRUCT_FOR_ID(stacklevel)
STRUCT_FOR_ID(start)
+ STRUCT_FOR_ID(start_time_us)
STRUCT_FOR_ID(statement)
STRUCT_FOR_ID(stats)
STRUCT_FOR_ID(status)
STRUCT_FOR_ID(times)
STRUCT_FOR_ID(timespec)
STRUCT_FOR_ID(timestamp)
+ STRUCT_FOR_ID(timestamp_us)
STRUCT_FOR_ID(timetuple)
STRUCT_FOR_ID(timeunit)
STRUCT_FOR_ID(top)
INIT_ID(co_varnames), \
INIT_ID(code), \
INIT_ID(col_offset), \
+ INIT_ID(collector), \
INIT_ID(command), \
INIT_ID(comment_factory), \
INIT_ID(compile_mode), \
+ INIT_ID(compression), \
INIT_ID(config), \
INIT_ID(consts), \
INIT_ID(context), \
INIT_ID(event), \
INIT_ID(eventmask), \
INIT_ID(exc), \
+ INIT_ID(exc_tb), \
INIT_ID(exc_type), \
+ INIT_ID(exc_val), \
INIT_ID(exc_value), \
INIT_ID(excepthook), \
INIT_ID(exception), \
INIT_ID(print_file_and_line), \
INIT_ID(priority), \
INIT_ID(progress), \
+ INIT_ID(progress_callback), \
INIT_ID(progress_routine), \
INIT_ID(proto), \
INIT_ID(protocol), \
INIT_ID(reversed), \
INIT_ID(rounding), \
INIT_ID(salt), \
+ INIT_ID(sample_interval_us), \
INIT_ID(sched_priority), \
INIT_ID(scheduler), \
INIT_ID(script), \
INIT_ID(spam), \
INIT_ID(src), \
INIT_ID(src_dir_fd), \
+ INIT_ID(stack_frames), \
INIT_ID(stacklevel), \
INIT_ID(start), \
+ INIT_ID(start_time_us), \
INIT_ID(statement), \
INIT_ID(stats), \
INIT_ID(status), \
INIT_ID(times), \
INIT_ID(timespec), \
INIT_ID(timestamp), \
+ INIT_ID(timestamp_us), \
INIT_ID(timetuple), \
INIT_ID(timeunit), \
INIT_ID(top), \
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(collector);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(command);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(compression);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(config);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(exc_tb);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(exc_type);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(exc_val);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(exc_value);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(progress_callback);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(progress_routine);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(sample_interval_us);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(sched_priority);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(stack_frames);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(stacklevel);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(start_time_us);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(statement);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(timestamp_us);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(timetuple);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
--- /dev/null
+# Profiling Binary Format
+
+The profiling module includes a binary file format for storing sampling
+profiler data. This document describes the format's structure and the
+design decisions behind it.
+
+The implementation is in
+[`Modules/_remote_debugging/binary_io_writer.c`](../Modules/_remote_debugging/binary_io_writer.c)
+and [`Modules/_remote_debugging/binary_io_reader.c`](../Modules/_remote_debugging/binary_io_reader.c),
+with declarations in
+[`Modules/_remote_debugging/binary_io.h`](../Modules/_remote_debugging/binary_io.h).
+
+## Overview
+
+The sampling profiler can generate enormous amounts of data. A typical
+profiling session sampling at 1000 Hz for 60 seconds produces 60,000 samples.
+Each sample contains a full call stack, often 20-50 frames deep, and each
+frame includes a filename, function name, and line number. In a text-based
+format like collapsed stacks, this would mean repeating the same long file
+paths and function names thousands of times.
+
+The binary format addresses this through two key strategies:
+
+1. **Deduplication**: Strings and frames are stored once in lookup tables,
+ then referenced by small integer indices. A 100-character file path that
+ appears in 50,000 samples is stored once, not 50,000 times.
+
+2. **Compact encoding**: Variable-length integers (varints) encode small
+ values in fewer bytes. Since most indices are small (under 128), they
+ typically need only one byte instead of four.
+
+Together with optional zstd compression, these techniques reduce file sizes
+by 10-50x compared to text formats while also enabling faster I/O.
+
+## File Layout
+
+The file consists of five sections:
+
+```
++------------------+ Offset 0
+| Header | 64 bytes (fixed)
++------------------+ Offset 64
+| |
+| Sample Data | Variable size (optionally compressed)
+| |
++------------------+ string_table_offset
+| String Table | Variable size
++------------------+ frame_table_offset
+| Frame Table | Variable size
++------------------+ file_size - 32
+| Footer | 32 bytes (fixed)
++------------------+ file_size
+```
+
+The layout is designed for streaming writes during profiling. The profiler
+cannot know in advance how many unique strings or frames will be encountered,
+so these tables must be built incrementally and written at the end.
+
+The header comes first so readers can quickly validate the file and locate
+the metadata tables. The sample data follows immediately, allowing the writer
+to stream samples directly to disk (or through a compression stream) without
+buffering the entire dataset in memory.
+
+The string and frame tables are placed after sample data because they grow
+as new unique entries are discovered during profiling. By deferring their
+output until finalization, the writer avoids the complexity of reserving
+space or rewriting portions of the file.
+
+The footer at the end contains counts needed to allocate arrays before
+parsing the tables. Placing it at a fixed offset from the end (rather than
+at a variable offset recorded in the header) means readers can locate it
+with a single seek to `file_size - 32`, without first reading the header.
+
+## Header
+
+```
+ Offset Size Type Description
++--------+------+---------+----------------------------------------+
+| 0 | 4 | uint32 | Magic number (0x54414348 = "TACH") |
+| 4 | 4 | uint32 | Format version |
+| 8 | 4 | bytes | Python version (major, minor, micro, |
+| | | | reserved) |
+| 12 | 8 | uint64 | Start timestamp (microseconds) |
+| 20 | 8 | uint64 | Sample interval (microseconds) |
+| 28 | 4 | uint32 | Total sample count |
+| 32 | 4 | uint32 | Thread count |
+| 36 | 8 | uint64 | String table offset |
+| 44 | 8 | uint64 | Frame table offset |
+| 52 | 4 | uint32 | Compression type (0=none, 1=zstd) |
+| 56 | 8 | bytes | Reserved (zero-filled) |
++--------+------+---------+----------------------------------------+
+```
+
+The magic number `0x54414348` ("TACH" for Tachyon) identifies the file format
+and also serves as an **endianness marker**. When read on a system with
+different byte order than the writer, it appears as `0x48434154`. The reader
+uses this to detect cross-endian files and automatically byte-swap all
+multi-byte integer fields.
+
+The Python version field records the major, minor, and micro version numbers
+of the Python interpreter that generated the file. This allows analysis tools
+to detect version mismatches when replaying data collected on a different
+Python version, which may have different internal structures or behaviors.
+
+The header is written as zeros initially, then overwritten with actual values
+during finalization. This requires the output stream to be seekable, which
+is acceptable since the format targets regular files rather than pipes or
+network streams.
+
+## Sample Data
+
+Sample data begins at offset 64 and extends to `string_table_offset`. Samples
+use delta compression to minimize redundancy when consecutive samples from the
+same thread have identical or similar call stacks.
+
+### Stack Encoding Types
+
+Each sample record begins with thread identification, then an encoding byte:
+
+| Code | Name | Description |
+|------|------|-------------|
+| 0x00 | REPEAT | RLE: identical stack repeated N times |
+| 0x01 | FULL | Complete stack (first sample or no match) |
+| 0x02 | SUFFIX | Shares N frames from bottom of previous stack |
+| 0x03 | POP_PUSH | Remove M frames from top, add N new frames |
+
+### Record Formats
+
+**REPEAT (0x00) - Run-Length Encoded Identical Stacks:**
+```
++-----------------+-----------+----------------------------------------+
+| thread_id | 8 bytes | Thread identifier (uint64, fixed) |
+| interpreter_id | 4 bytes | Interpreter ID (uint32, fixed) |
+| encoding | 1 byte | 0x00 (REPEAT) |
+| count | varint | Number of samples in this RLE group |
+| samples | varies | Interleaved: [delta: varint, status: 1]|
+| | | repeated count times |
++-----------------+-----------+----------------------------------------+
+```
+The stack is inherited from this thread's previous sample. Each sample in the
+group gets its own timestamp delta and status byte, stored as interleaved pairs
+(delta1, status1, delta2, status2, ...) rather than separate arrays.
+
+**FULL (0x01) - Complete Stack:**
+```
++-----------------+-----------+----------------------------------------+
+| thread_id | 8 bytes | Thread identifier (uint64, fixed) |
+| interpreter_id | 4 bytes | Interpreter ID (uint32, fixed) |
+| encoding | 1 byte | 0x01 (FULL) |
+| timestamp_delta | varint | Microseconds since thread's last sample|
+| status | 1 byte | Thread state flags |
+| stack_depth | varint | Number of frames in call stack |
+| frame_indices | varint[] | Array of frame table indices |
++-----------------+-----------+----------------------------------------+
+```
+Used for the first sample from a thread, or when delta encoding would not
+provide savings.
+
+**SUFFIX (0x02) - Shared Suffix Match:**
+```
++-----------------+-----------+----------------------------------------+
+| thread_id | 8 bytes | Thread identifier (uint64, fixed) |
+| interpreter_id | 4 bytes | Interpreter ID (uint32, fixed) |
+| encoding | 1 byte | 0x02 (SUFFIX) |
+| timestamp_delta | varint | Microseconds since thread's last sample|
+| status | 1 byte | Thread state flags |
+| shared_count | varint | Frames shared from bottom of prev stack|
+| new_count | varint | New frames at top of stack |
+| new_frames | varint[] | Array of new_count frame indices |
++-----------------+-----------+----------------------------------------+
+```
+Used when a function call added frames to the top of the stack. The shared
+frames from the previous stack are kept, and new frames are prepended.
+
+**POP_PUSH (0x03) - Pop and Push:**
+```
++-----------------+-----------+----------------------------------------+
+| thread_id | 8 bytes | Thread identifier (uint64, fixed) |
+| interpreter_id | 4 bytes | Interpreter ID (uint32, fixed) |
+| encoding | 1 byte | 0x03 (POP_PUSH) |
+| timestamp_delta | varint | Microseconds since thread's last sample|
+| status | 1 byte | Thread state flags |
+| pop_count | varint | Frames to remove from top of prev stack|
+| push_count | varint | New frames to add at top |
+| new_frames | varint[] | Array of push_count frame indices |
++-----------------+-----------+----------------------------------------+
+```
+Used when the code path changed: some frames were popped (function returns)
+and new frames were pushed (different function calls).
+
+### Thread and Interpreter Identification
+
+Thread IDs are 64-bit values that can be large (memory addresses on some
+platforms) and vary unpredictably. Using a fixed 8-byte encoding avoids
+the overhead of varint encoding for large values and simplifies parsing
+since the reader knows exactly where each field begins.
+
+The interpreter ID identifies which Python sub-interpreter the thread
+belongs to, allowing analysis tools to separate activity across interpreters
+in processes using multiple sub-interpreters.
+
+### Status Byte
+
+The status byte is a bitfield encoding thread state at sample time:
+
+| Bit | Flag | Meaning |
+|-----|-----------------------|--------------------------------------------|
+| 0 | THREAD_STATUS_HAS_GIL | Thread holds the GIL (Global Interpreter Lock) |
+| 1 | THREAD_STATUS_ON_CPU | Thread is actively running on a CPU core |
+| 2 | THREAD_STATUS_UNKNOWN | Thread state could not be determined |
+| 3 | THREAD_STATUS_GIL_REQUESTED | Thread is waiting to acquire the GIL |
+| 4 | THREAD_STATUS_HAS_EXCEPTION | Thread has a pending exception |
+
+Multiple flags can be set simultaneously (e.g., a thread can hold the GIL
+while also running on CPU). Analysis tools use these to filter samples or
+visualize thread states over time.
+
+### Timestamp Delta Encoding
+
+Timestamps use delta encoding rather than absolute values. Absolute
+timestamps in microseconds require 8 bytes each, but consecutive samples
+from the same thread are typically separated by the sampling interval
+(e.g., 1000 microseconds), so the delta between them is small and fits
+in 1-2 varint bytes. The writer tracks the previous timestamp for each
+thread separately. The first sample from a thread encodes its delta from
+the profiling start time; subsequent samples encode the delta from that
+thread's previous sample. This per-thread tracking is necessary because
+samples are interleaved across threads in arrival order, not grouped by
+thread.
+
+For REPEAT (RLE) records, timestamp deltas and status bytes are stored as
+interleaved pairs (delta, status, delta, status, ...) - one pair per
+repeated sample - allowing efficient batching while preserving the exact
+timing and state of each sample.
+
+### Frame Indexing
+
+Each frame in a call stack is represented by an index into the frame table
+rather than inline data. This provides massive space savings because call
+stacks are highly repetitive: the same function appears in many samples
+(hot functions), call stacks often share common prefixes (main -> app ->
+handler -> ...), and recursive functions create repeated frame sequences.
+A frame index is typically 1-2 varint bytes. Inline frame data would be
+20-200+ bytes (two strings plus a line number). For a profile with 100,000
+samples averaging 30 frames each, this reduces frame data from potentially
+gigabytes to tens of megabytes.
+
+Frame indices are written innermost-first (the currently executing frame
+has index 0 in the array). This ordering works well with delta compression:
+function calls typically add frames at the top (index 0), while shared
+frames remain at the bottom.
+
+## String Table
+
+The string table stores deduplicated UTF-8 strings (filenames and function
+names). It begins at `string_table_offset` and contains entries in order of
+their assignment during writing:
+
+```
++----------------+
+| length: varint |
+| data: bytes |
++----------------+ (repeated for each string)
+```
+
+Strings are stored in the order they were first encountered during writing.
+The first unique filename gets index 0, the second gets index 1, and so on.
+Length-prefixing (rather than null-termination) allows strings containing
+null bytes and enables readers to allocate exact-sized buffers. The varint
+length encoding means short strings (under 128 bytes) need only one length
+byte.
+
+## Frame Table
+
+The frame table stores deduplicated frame entries:
+
+```
++----------------------+
+| filename_idx: varint |
+| funcname_idx: varint |
+| lineno: svarint |
++----------------------+ (repeated for each frame)
+```
+
+Each unique (filename, funcname, lineno) combination gets one entry. Two
+calls to the same function at different line numbers produce different
+frame entries; two calls at the same line number share one entry.
+
+Strings and frames are deduplicated separately because they have different
+cardinalities and reference patterns. A codebase might have hundreds of
+unique source files but thousands of unique functions. Many functions share
+the same filename, so storing the filename index in each frame entry (rather
+than the full string) provides an additional layer of deduplication. A frame
+entry is just three varints (typically 3-6 bytes) rather than two full
+strings plus a line number.
+
+Line numbers use signed varint (zigzag encoding) rather than unsigned to
+handle edge cases. Synthetic frames—generated frames that don't correspond
+directly to Python source code, such as C extension boundaries or internal
+interpreter frames—use line number 0 or -1 to indicate the absence of a
+source location. Zigzag encoding ensures these small negative values encode
+efficiently (−1 becomes 1, which is one byte) rather than requiring the
+maximum varint length.
+
+## Footer
+
+```
+ Offset Size Type Description
++--------+------+---------+----------------------------------------+
+| 0 | 4 | uint32 | String count |
+| 4 | 4 | uint32 | Frame count |
+| 8 | 8 | uint64 | Total file size |
+| 16 | 16 | bytes | Checksum (reserved, currently zeros) |
++--------+------+---------+----------------------------------------+
+```
+
+The string and frame counts allow readers to pre-allocate arrays of the
+correct size before parsing the tables. Without these counts, readers would
+need to either scan the tables twice (once to count, once to parse) or use
+dynamically-growing arrays.
+
+The file size field provides a consistency check: if the actual file size
+does not match, the file may be truncated or corrupted.
+
+The checksum field is reserved for future use. A checksum would allow
+detection of corruption but adds complexity and computation cost. The
+current implementation leaves this as zeros.
+
+## Variable-Length Integer Encoding
+
+The format uses LEB128 (Little Endian Base 128) for unsigned integers and
+zigzag + LEB128 for signed integers. These encodings are widely used
+(Protocol Buffers, DWARF debug info, WebAssembly) and well-understood.
+
+### Unsigned Varint (LEB128)
+
+Each byte stores 7 bits of data. The high bit indicates whether more bytes
+follow:
+
+```
+Value Encoded bytes
+0-127 [0xxxxxxx] (1 byte)
+128-16383 [1xxxxxxx] [0xxxxxxx] (2 bytes)
+16384+ [1xxxxxxx] [1xxxxxxx] ... (3+ bytes)
+```
+
+Most indices in profiling data are small. A profile with 1000 unique frames
+needs at most 2 bytes per frame index. The common case (indices under 128)
+needs only 1 byte.
+
+### Signed Varint (Zigzag)
+
+Standard LEB128 encodes −1 as a very large unsigned value, requiring many
+bytes. Zigzag encoding interleaves positive and negative values:
+
+```
+ 0 -> 0 -1 -> 1 1 -> 2 -2 -> 3 2 -> 4
+```
+
+This ensures small-magnitude values (whether positive or negative) encode
+in few bytes.
+
+## Compression
+
+When compression is enabled, the sample data region contains a zstd stream.
+The string table, frame table, and footer remain uncompressed so readers can
+access metadata without decompressing the entire file. A tool that only needs
+to report "this file contains 50,000 samples of 3 threads" can read the header
+and footer without touching the compressed sample data. This also simplifies
+the format: the header's offset fields point directly to the tables rather
+than to positions within a decompressed stream.
+
+Zstd provides an excellent balance of compression ratio and speed. Profiling
+data compresses very well (often 5-10x) due to repetitive patterns: the same
+small set of frame indices appears repeatedly, and delta-encoded timestamps
+cluster around the sampling interval. Zstd's streaming API allows compression
+without buffering the entire dataset. The writer feeds sample data through
+the compressor incrementally, flushing compressed chunks to disk as they
+become available.
+
+Level 5 compression is used as a default. Lower levels (1-3) are faster but
+compress less; higher levels (6+) compress more but slow down writing. Level
+5 provides good compression with minimal impact on profiling overhead.
+
+## Reading and Writing
+
+### Writing
+
+1. Open the output file and write 64 zero bytes as a placeholder header
+2. Initialize empty string and frame dictionaries for deduplication
+3. For each sample:
+ - Intern any new strings, assigning sequential indices
+ - Intern any new frames, assigning sequential indices
+ - Encode the sample record and write to the buffer
+ - Flush the buffer through compression (if enabled) when full
+4. Flush remaining buffered data and finalize compression
+5. Write the string table (length-prefixed strings in index order)
+6. Write the frame table (varint-encoded entries in index order)
+7. Write the footer with final counts
+8. Seek to offset 0 and write the header with actual values
+
+The writer maintains two dictionaries: one mapping strings to indices, one
+mapping (filename_idx, funcname_idx, lineno) tuples to frame indices. These
+enable O(1) lookup during interning.
+
+### Reading
+
+1. Read the header magic number to detect endianness (set `needs_swap` flag
+ if the magic appears byte-swapped)
+2. Validate version and read remaining header fields (byte-swapping if needed)
+3. Seek to end − 32 and read the footer (byte-swapping counts if needed)
+4. Allocate string array of `string_count` elements
+5. Parse the string table, populating the array
+6. Allocate frame array of `frame_count * 3` uint32 elements
+7. Parse the frame table, populating the array
+8. If compressed, decompress the sample data region
+9. Iterate through samples, resolving indices to strings/frames
+ (byte-swapping thread_id and interpreter_id if needed)
+
+The reader builds lookup arrays rather than dictionaries since it only needs
+index-to-value mapping, not value-to-index.
+
+## Platform Considerations
+
+### Byte Ordering and Cross-Platform Portability
+
+The binary format uses **native byte order** for all multi-byte integer
+fields when writing. However, the reader supports **cross-endian reading**:
+files written on a little-endian system (x86, ARM) can be read on a
+big-endian system (s390x, PowerPC), and vice versa.
+
+The magic number doubles as an endianness marker. When read on a system with
+different byte order, it appears byte-swapped (`0x48434154` instead of
+`0x54414348`). The reader detects this and automatically byte-swaps all
+fixed-width integer fields during parsing.
+
+Writers must use `memcpy()` from properly-sized integer types when writing
+fixed-width integer fields. When the source variable's type differs from the
+field width (e.g., `size_t` written as 4 bytes), explicit casting to the
+correct type (e.g., `uint32_t`) is required before `memcpy()`. On big-endian
+systems, copying from an oversized type would copy the wrong bytes—high-order
+zeros instead of the actual value.
+
+The reader tracks whether byte-swapping is needed via a `needs_swap` flag set
+during header parsing. All fixed-width fields in the header, footer, and
+sample data are conditionally byte-swapped using Python's internal byte-swap
+functions (`_Py_bswap32`, `_Py_bswap64` from `pycore_bitutils.h`).
+
+Variable-length integers (varints) are byte-order independent since they
+encode values one byte at a time using the LEB128 scheme, so they require
+no special handling for cross-endian reading.
+
+### Memory-Mapped I/O
+
+On Unix systems (Linux, macOS), the reader uses `mmap()` to map the file
+into the process address space. The kernel handles paging data in and out
+as needed, no explicit read() calls or buffer management are required,
+multiple readers can share the same physical pages, and sequential access
+patterns benefit from kernel read-ahead.
+
+The implementation uses `madvise()` to hint the access pattern to the kernel:
+`MADV_SEQUENTIAL` indicates the file will be read linearly, enabling
+aggressive read-ahead. `MADV_WILLNEED` requests pre-faulting of pages.
+On Linux, `MAP_POPULATE` pre-faults all pages at mmap time rather than on
+first access, moving page fault overhead from the parsing loop to the
+initial mapping for more predictable performance. For large files (over
+32 MB), `MADV_HUGEPAGE` requests transparent huge pages (2 MB instead of
+4 KB) to reduce TLB pressure when accessing large amounts of data.
+
+On Windows, the implementation falls back to standard file I/O with full
+file buffering. Profiling data files are typically small enough (tens to
+hundreds of megabytes) that this is acceptable.
+
+The writer uses a 512 KB buffer to batch small writes. Each sample record
+is typically tens of bytes; writing these individually would incur excessive
+syscall overhead. The buffer accumulates data until full, then flushes in
+one write() call (or feeds through the compression stream).
+
+## Future Considerations
+
+The format reserves space for future extensions. The 12 reserved bytes in
+the header could hold additional metadata. The 16-byte checksum field in
+the footer is currently unused. The version field allows incompatible
+changes with graceful rejection. New compression types could be added
+(compression_type > 1).
+
+Any changes that alter the meaning of existing fields or the parsing logic
+should increment the version number to prevent older readers from
+misinterpreting new files.
--- /dev/null
+"""Thin Python wrapper around C binary writer for profiling data."""
+
+import time
+
+import _remote_debugging
+
+from .collector import Collector
+
+# Compression type constants (must match binary_io.h)
+COMPRESSION_NONE = 0
+COMPRESSION_ZSTD = 1
+
+
+def _resolve_compression(compression):
+ """Resolve compression type from string or int.
+
+ Args:
+ compression: 'auto', 'zstd', 'none', or int (0/1)
+
+ Returns:
+ int: Compression type constant
+ """
+ if isinstance(compression, int):
+ return compression
+
+ compression = compression.lower()
+ if compression == 'none':
+ return COMPRESSION_NONE
+ elif compression == 'zstd':
+ return COMPRESSION_ZSTD
+ elif compression == 'auto':
+ # Auto: use zstd if available, otherwise none
+ if _remote_debugging.zstd_available():
+ return COMPRESSION_ZSTD
+ return COMPRESSION_NONE
+ else:
+ raise ValueError(f"Unknown compression type: {compression}")
+
+
+class BinaryCollector(Collector):
+ """High-performance binary collector using C implementation.
+
+ This collector writes profiling data directly to a binary file format
+ with optional zstd compression. All I/O is performed in C for maximum
+ throughput.
+
+ The binary format uses string/frame deduplication and varint encoding
+ for efficient storage.
+ """
+
+ def __init__(self, filename, sample_interval_usec, *, skip_idle=False,
+ compression='auto'):
+ """Create a new binary collector.
+
+ Args:
+ filename: Path to output binary file
+ sample_interval_usec: Sampling interval in microseconds
+ skip_idle: If True, skip idle threads (not used in binary format)
+ compression: 'auto', 'zstd', 'none', or int (0=none, 1=zstd)
+ """
+ self.filename = filename
+ self.sample_interval_usec = sample_interval_usec
+ self.skip_idle = skip_idle
+
+ compression_type = _resolve_compression(compression)
+ start_time_us = int(time.monotonic() * 1_000_000)
+ self._writer = _remote_debugging.BinaryWriter(
+ filename, sample_interval_usec, start_time_us, compression=compression_type
+ )
+
+ def collect(self, stack_frames, timestamp_us=None):
+ """Collect profiling data from stack frames.
+
+ This passes stack_frames directly to the C writer which handles
+ all encoding and buffering.
+
+ Args:
+ stack_frames: List of InterpreterInfo objects from _remote_debugging
+ timestamp_us: Optional timestamp in microseconds. If not provided,
+ uses time.monotonic() to generate one.
+ """
+ if timestamp_us is None:
+ timestamp_us = int(time.monotonic() * 1_000_000)
+ self._writer.write_sample(stack_frames, timestamp_us)
+
+ def collect_failed_sample(self):
+ """Record a failed sample attempt (no-op for binary format)."""
+ pass
+
+ def export(self, filename=None):
+ """Finalize and close the binary file.
+
+ Args:
+ filename: Ignored (binary files are written incrementally)
+ """
+ self._writer.finalize()
+
+ @property
+ def total_samples(self):
+ return self._writer.total_samples
+
+ def get_stats(self):
+ """Get encoding statistics.
+
+ Returns:
+ Dict with encoding statistics including repeat/full/suffix/pop-push
+ record counts, frames written/saved, and compression ratio.
+ """
+ return self._writer.get_stats()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ """Context manager exit - finalize unless there was an error."""
+ if exc_type is None:
+ self._writer.finalize()
+ else:
+ self._writer.close()
+ return False
--- /dev/null
+"""Thin Python wrapper around C binary reader for profiling data."""
+
+
+class BinaryReader:
+ """High-performance binary reader using C implementation.
+
+ This reader uses memory-mapped I/O (on Unix) for fast replay of
+ profiling data from binary files.
+
+ Use as a context manager:
+ with BinaryReader('profile.bin') as reader:
+ info = reader.get_info()
+ reader.replay_samples(collector, progress_callback)
+ """
+
+ def __init__(self, filename):
+ """Create a new binary reader.
+
+ Args:
+ filename: Path to input binary file
+ """
+ self.filename = filename
+ self._reader = None
+
+ def __enter__(self):
+ import _remote_debugging
+ self._reader = _remote_debugging.BinaryReader(self.filename)
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self._reader is not None:
+ self._reader.close()
+ self._reader = None
+ return False
+
+ def get_info(self):
+ """Get metadata about the binary file.
+
+ Returns:
+ dict: File metadata including:
+ - sample_count: Number of samples in the file
+ - sample_interval_us: Sampling interval in microseconds
+ - start_time_us: Start timestamp in microseconds
+ - string_count: Number of unique strings
+ - frame_count: Number of unique frames
+ - compression: Compression type used
+ """
+ if self._reader is None:
+ raise RuntimeError("Reader not open. Use as context manager.")
+ return self._reader.get_info()
+
+ def replay_samples(self, collector, progress_callback=None):
+ """Replay samples from binary file through a collector.
+
+ This allows converting binary profiling data to other formats
+ (e.g., flamegraph, pstats) by replaying through the appropriate
+ collector.
+
+ Args:
+ collector: A Collector instance with a collect() method
+ progress_callback: Optional callable(current, total) for progress
+
+ Returns:
+ int: Number of samples replayed
+ """
+ if self._reader is None:
+ raise RuntimeError("Reader not open. Use as context manager.")
+ return self._reader.replay(collector, progress_callback)
+
+ @property
+ def sample_count(self):
+ if self._reader is None:
+ raise RuntimeError("Reader not open. Use as context manager.")
+ return self._reader.get_info()['sample_count']
+
+ def get_stats(self):
+ """Get reconstruction statistics from replay.
+
+ Returns:
+ dict: Statistics about record types decoded and samples
+ reconstructed during replay.
+ """
+ if self._reader is None:
+ raise RuntimeError("Reader not open. Use as context manager.")
+ return self._reader.get_stats()
+
+
+def convert_binary_to_format(input_file, output_file, output_format,
+ sample_interval_usec=None, progress_callback=None):
+ """Convert a binary profiling file to another format.
+
+ Args:
+ input_file: Path to input binary file
+ output_file: Path to output file
+ output_format: Target format ('flamegraph', 'collapsed', 'pstats', etc.)
+ sample_interval_usec: Override sample interval (uses file's if None)
+ progress_callback: Optional callable(current, total) for progress
+
+ Returns:
+ int: Number of samples converted
+ """
+ from .gecko_collector import GeckoCollector
+ from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+ from .pstats_collector import PStatsCollector
+
+ with BinaryReader(input_file) as reader:
+ info = reader.get_info()
+ interval = sample_interval_usec or info['sample_interval_us']
+
+ # Create appropriate collector based on format
+ if output_format == 'flamegraph':
+ collector = FlamegraphCollector(interval)
+ elif output_format == 'collapsed':
+ collector = CollapsedStackCollector(interval)
+ elif output_format == 'pstats':
+ collector = PStatsCollector(interval)
+ elif output_format == 'gecko':
+ collector = GeckoCollector(interval)
+ else:
+ raise ValueError(f"Unknown output format: {output_format}")
+
+ # Replay samples through collector
+ count = reader.replay_samples(collector, progress_callback)
+
+ # Export to target format
+ collector.export(output_file)
+
+ return count
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
from .heatmap_collector import HeatmapCollector
from .gecko_collector import GeckoCollector
+from .binary_collector import BinaryCollector
+from .binary_reader import BinaryReader
from .constants import (
PROFILING_MODE_ALL,
PROFILING_MODE_WALL,
"flamegraph": "html",
"gecko": "json",
"heatmap": "html",
+ "binary": "bin",
}
COLLECTOR_MAP = {
"flamegraph": FlamegraphCollector,
"gecko": GeckoCollector,
"heatmap": HeatmapCollector,
+ "binary": BinaryCollector,
}
def _setup_child_monitor(args, parent_pid):
def _check_process_died(process):
"""Check if process died and raise an error with stderr if available."""
if process.poll() is None:
- return # Process still running
+ return
# Process died - try to get stderr for error message
stderr_msg = ""
)
-def _add_format_options(parser):
+def _add_format_options(parser, include_compression=True, include_binary=True):
"""Add output format options to a parser."""
output_group = parser.add_argument_group("Output options")
format_group = output_group.add_mutually_exclusive_group()
dest="format",
help="Generate interactive HTML heatmap visualization with line-level sample counts",
)
+ if include_binary:
+ format_group.add_argument(
+ "--binary",
+ action="store_const",
+ const="binary",
+ dest="format",
+ help="Generate high-performance binary format (use 'replay' command to convert)",
+ )
parser.set_defaults(format="pstats")
+ if include_compression:
+ output_group.add_argument(
+ "--compression",
+ choices=["auto", "zstd", "none"],
+ default="auto",
+ help="Compression for binary format: auto (use zstd if available), zstd, none",
+ )
+
output_group.add_argument(
"-o",
"--output",
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
-def _create_collector(format_type, interval, skip_idle, opcodes=False):
+def _create_collector(format_type, interval, skip_idle, opcodes=False,
+ output_file=None, compression='auto'):
"""Create the appropriate collector based on format type.
Args:
- format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
+ format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary')
interval: Sampling interval in microseconds
skip_idle: Whether to skip idle samples
opcodes: Whether to collect opcode information (only used by gecko format
for creating interval markers in Firefox Profiler)
+ output_file: Output file path (required for binary format)
+ compression: Compression type for binary format ('auto', 'zstd', 'none')
Returns:
A collector instance of the appropriate type
if collector_class is None:
raise ValueError(f"Unknown format: {format_type}")
+ # Binary format requires output file and compression
+ if format_type == "binary":
+ if output_file is None:
+ raise ValueError("Binary format requires an output file")
+ return collector_class(output_file, interval, skip_idle=skip_idle,
+ compression=compression)
+
# Gecko format never skips idle (it needs both GIL and CPU data)
# and is the only format that uses opcodes for interval markers
if format_type == "gecko":
pid: Process ID (for generating filenames)
mode: Profiling mode used
"""
- if args.format == "pstats":
+ if args.format == "binary":
+ # Binary format already wrote to file incrementally, just finalize
+ collector.export(None)
+ filename = collector.filename
+ print(f"Binary profile written to {filename} ({collector.total_samples} samples)")
+ elif args.format == "pstats":
if args.outfile:
# If outfile is a directory, generate filename inside it
if os.path.isdir(args.outfile):
args: Parsed command-line arguments
parser: ArgumentParser instance for error reporting
"""
+ # Replay command has no special validation needed
+ if getattr(args, 'command', None) == "replay":
+ return
+
# Check if live mode is available
if hasattr(args, 'live') and args.live and LiveStatsCollector is None:
parser.error(
parser.error("--subprocesses is incompatible with --live mode.")
# Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
- if args.async_aware:
+ if getattr(args, 'async_aware', False):
issues = []
if args.native:
issues.append("--native")
)
# --async-mode requires --async-aware
- if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware:
+ if hasattr(args, 'async_mode') and args.async_mode != "running" and not getattr(args, 'async_aware', False):
parser.error("--async-mode requires --async-aware to be enabled.")
# Live mode is incompatible with format options
return
# Validate gecko mode doesn't use non-wall mode
- if args.format == "gecko" and args.mode != "wall":
+ if args.format == "gecko" and getattr(args, 'mode', 'wall') != "wall":
parser.error(
"--mode option is incompatible with --gecko. "
"Gecko format automatically includes both GIL-holding and CPU status analysis."
# Validate --opcodes is only used with compatible formats
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
- if args.opcodes and args.format not in opcodes_compatible_formats:
+ if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats:
parser.error(
f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
)
_add_format_options(attach_parser)
_add_pstats_options(attach_parser)
+ # === REPLAY COMMAND ===
+ replay_parser = subparsers.add_parser(
+ "replay",
+ help="Replay a binary profile and convert to another format",
+ formatter_class=CustomFormatter,
+ description="""Replay a binary profile file and convert to another format
+
+Examples:
+ # Convert binary to flamegraph
+ `python -m profiling.sampling replay --flamegraph -o output.html profile.bin`
+
+ # Convert binary to pstats and print to stdout
+ `python -m profiling.sampling replay profile.bin`
+
+ # Convert binary to gecko format
+ `python -m profiling.sampling replay --gecko -o profile.json profile.bin`""",
+ )
+ replay_parser.add_argument(
+ "input_file",
+ help="Binary profile file to replay",
+ )
+ _add_format_options(replay_parser, include_compression=False, include_binary=False)
+ _add_pstats_options(replay_parser)
+
# Parse arguments
args = parser.parse_args()
command_handlers = {
"run": _handle_run,
"attach": _handle_attach,
+ "replay": _handle_replay,
}
# Execute the appropriate command
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
)
+ output_file = None
+ if args.format == "binary":
+ output_file = args.outfile or _generate_output_filename(args.format, args.pid)
+
# Create the appropriate collector
- collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
+ collector = _create_collector(
+ args.format, args.interval, skip_idle, args.opcodes,
+ output_file=output_file,
+ compression=getattr(args, 'compression', 'auto')
+ )
with _get_child_monitor_context(args, args.pid):
collector = sample(
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
)
+ output_file = None
+ if args.format == "binary":
+ output_file = args.outfile or _generate_output_filename(args.format, process.pid)
+
# Create the appropriate collector
- collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
+ collector = _create_collector(
+ args.format, args.interval, skip_idle, args.opcodes,
+ output_file=output_file,
+ compression=getattr(args, 'compression', 'auto')
+ )
with _get_child_monitor_context(args, process.pid):
try:
process.wait()
+def _handle_replay(args):
+ """Handle the 'replay' command - convert binary profile to another format."""
+ import os
+
+ if not os.path.exists(args.input_file):
+ sys.exit(f"Error: Input file not found: {args.input_file}")
+
+ with BinaryReader(args.input_file) as reader:
+ info = reader.get_info()
+ interval = info['sample_interval_us']
+
+ print(f"Replaying {info['sample_count']} samples from {args.input_file}")
+ print(f" Sample interval: {interval} us")
+ print(f" Compression: {'zstd' if info.get('compression_type', 0) == 1 else 'none'}")
+
+ collector = _create_collector(args.format, interval, skip_idle=False)
+
+ def progress_callback(current, total):
+ if total > 0:
+ pct = current / total
+ bar_width = 40
+ filled = int(bar_width * pct)
+ bar = '█' * filled + '░' * (bar_width - filled)
+ print(f"\r [{bar}] {pct*100:5.1f}% ({current:,}/{total:,})", end="", flush=True)
+
+ count = reader.replay_samples(collector, progress_callback)
+ print()
+
+ if args.format == "pstats":
+ if args.outfile:
+ collector.export(args.outfile)
+ else:
+ sort_choice = args.sort if args.sort is not None else "nsamples"
+ limit = args.limit if args.limit is not None else 15
+ sort_mode = _sort_to_mode(sort_choice)
+ collector.print_stats(sort_mode, limit, not args.no_summary, PROFILING_MODE_WALL)
+ else:
+ filename = args.outfile or _generate_output_filename(args.format, os.getpid())
+ collector.export(filename)
+
+ print(f"Replayed {count} samples")
+
+
if __name__ == "__main__":
main()
class Collector(ABC):
@abstractmethod
- def collect(self, stack_frames):
- """Collect profiling data from stack frames."""
+ def collect(self, stack_frames, timestamps_us=None):
+ """Collect profiling data from stack frames.
+
+ Args:
+ stack_frames: List of InterpreterInfo objects
+ timestamps_us: Optional list of timestamps in microseconds. If provided
+ (from binary replay with RLE batching), use these instead of current
+ time. If None, collectors should use time.monotonic() or similar.
+ The list may contain multiple timestamps when samples are batched
+ together (same stack, different times).
+ """
def collect_failed_sample(self):
"""Collect data about a failed sample attempt."""
# Phase 3: Build linear stacks from each leaf to root (optimized - no sorting!)
yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parent)
+ def _iter_stacks(self, stack_frames, skip_idle=False):
+ """Yield (frames, thread_id) for all stacks, handling both sync and async modes."""
+ if stack_frames and hasattr(stack_frames[0], "awaited_by"):
+ for frames, thread_id, _ in self._iter_async_frames(stack_frames):
+ if frames:
+ yield frames, thread_id
+ else:
+ for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
+ if frames:
+ yield frames, thread_id
+
def _build_task_graph(self, awaited_info_list):
task_map = {}
child_to_parent = {} # Maps child_id -> (selected_parent_id, parent_count)
self.sample_interval_usec = sample_interval_usec
self.skip_idle = skip_idle
self.opcodes_enabled = opcodes
- self.start_time = time.time() * 1000 # milliseconds since epoch
+ self.start_time = time.monotonic() * 1000 # milliseconds since start
# Global string table (shared across all threads)
self.global_strings = ["(root)"] # Start with root
# Opcode state tracking per thread: tid -> (opcode, lineno, col_offset, funcname, filename, start_time)
self.opcode_state = {}
+ # For binary replay: track base timestamp (first sample's timestamp)
+ self._replay_base_timestamp_us = None
+
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
active_name, inactive_name, category, current_time):
"""Track binary state transitions and emit markers.
self._add_marker(tid, active_name, active_dict.pop(tid),
current_time, category)
- def collect(self, stack_frames):
- """Collect a sample from stack frames."""
- current_time = (time.time() * 1000) - self.start_time
+ def collect(self, stack_frames, timestamps_us=None):
+ """Collect samples from stack frames.
+
+ Args:
+ stack_frames: List of interpreter/thread frame info
+ timestamps_us: List of timestamps in microseconds (None for live sampling)
+ """
+ # Handle live sampling (no timestamps provided)
+ if timestamps_us is None:
+ current_time = (time.monotonic() * 1000) - self.start_time
+ times = [current_time]
+ else:
+ if not timestamps_us:
+ return
+ # Initialize base timestamp if needed
+ if self._replay_base_timestamp_us is None:
+ self._replay_base_timestamp_us = timestamps_us[0]
+ # Convert all timestamps to times (ms relative to first sample)
+ base = self._replay_base_timestamp_us
+ times = [(ts - base) / 1000 for ts in timestamps_us]
+
+ first_time = times[0]
# Update interval calculation
if self.sample_count > 0 and self.last_sample_time > 0:
- self.interval = (
- current_time - self.last_sample_time
- ) / self.sample_count
- self.last_sample_time = current_time
+ self.interval = (times[-1] - self.last_sample_time) / self.sample_count
+ self.last_sample_time = times[-1]
- # Process threads and track GC per thread
+ # Process threads
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
frames = thread_info.frame_info
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
- # Track GIL possession (Has GIL / No GIL)
+ # Track state transitions using first timestamp
self._track_state_transition(
tid, has_gil, self.has_gil_start, self.no_gil_start,
- "Has GIL", "No GIL", CATEGORY_GIL, current_time
+ "Has GIL", "No GIL", CATEGORY_GIL, first_time
)
-
- # Track CPU state (On CPU / Off CPU)
self._track_state_transition(
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
- "On CPU", "Off CPU", CATEGORY_CPU, current_time
+ "On CPU", "Off CPU", CATEGORY_CPU, first_time
)
- # Track code type (Python Code / Native Code)
- # This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
+ # Track code type
if has_gil:
self._track_state_transition(
tid, True, self.python_code_start, self.native_code_start,
- "Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
+ "Python Code", "Native Code", CATEGORY_CODE_TYPE, first_time
)
elif on_cpu:
self._track_state_transition(
tid, True, self.native_code_start, self.python_code_start,
- "Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
+ "Native Code", "Python Code", CATEGORY_CODE_TYPE, first_time
)
else:
- # Thread is idle (neither has GIL nor on CPU) - close any open code markers
- # This handles the third state that _track_state_transition doesn't cover
if tid in self.initialized_threads:
if tid in self.python_code_start:
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
- current_time, CATEGORY_CODE_TYPE)
+ first_time, CATEGORY_CODE_TYPE)
if tid in self.native_code_start:
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
- current_time, CATEGORY_CODE_TYPE)
+ first_time, CATEGORY_CODE_TYPE)
- # Track "Waiting for GIL" intervals (one-sided tracking)
+ # Track GIL wait
if gil_requested:
- self.gil_wait_start.setdefault(tid, current_time)
+ self.gil_wait_start.setdefault(tid, first_time)
elif tid in self.gil_wait_start:
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
- current_time, CATEGORY_GIL)
+ first_time, CATEGORY_GIL)
- # Track exception state (Has Exception / No Exception)
+ # Track exception state
has_exception = bool(status_flags & THREAD_STATUS_HAS_EXCEPTION)
self._track_state_transition(
tid, has_exception, self.exception_start, self.no_exception_start,
- "Has Exception", "No Exception", CATEGORY_EXCEPTION, current_time
+ "Has Exception", "No Exception", CATEGORY_EXCEPTION, first_time
)
- # Track GC events by detecting <GC> frames in the stack trace
- # This leverages the improved GC frame tracking from commit 336366fd7ca
- # which precisely identifies the thread that initiated GC collection
+ # Track GC events
has_gc_frame = any(frame[2] == "<GC>" for frame in frames)
if has_gc_frame:
- # This thread initiated GC collection
if tid not in self.gc_start_per_thread:
- self.gc_start_per_thread[tid] = current_time
+ self.gc_start_per_thread[tid] = first_time
elif tid in self.gc_start_per_thread:
- # End GC marker when no more GC frames are detected
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
- current_time, CATEGORY_GC)
+ first_time, CATEGORY_GC)
- # Mark thread as initialized after processing all state transitions
+ # Mark thread as initialized
self.initialized_threads.add(tid)
- # Categorize: idle if neither has GIL nor on CPU
+ # Skip idle threads if requested
is_idle = not has_gil and not on_cpu
-
- # Skip idle threads if skip_idle is enabled
if self.skip_idle and is_idle:
continue
if not frames:
continue
- # Process the stack
+ # Process stack once to get stack_index
stack_index = self._process_stack(thread_data, frames)
- # Add sample - cache references to avoid dictionary lookups
+ # Add samples with timestamps
samples = thread_data["samples"]
- samples["stack"].append(stack_index)
- samples["time"].append(current_time)
- samples["eventDelay"].append(None)
+ samples_stack = samples["stack"]
+ samples_time = samples["time"]
+ samples_delay = samples["eventDelay"]
+
+ for t in times:
+ samples_stack.append(stack_index)
+ samples_time.append(t)
+ samples_delay.append(None)
- # Track opcode state changes for interval markers (leaf frame only)
- if self.opcodes_enabled:
+ # Handle opcodes
+ if self.opcodes_enabled and frames:
leaf_frame = frames[0]
filename, location, funcname, opcode = leaf_frame
if isinstance(location, tuple):
current_state = (opcode, lineno, col_offset, funcname, filename)
if tid not in self.opcode_state:
- # First observation - start tracking
- self.opcode_state[tid] = (*current_state, current_time)
+ self.opcode_state[tid] = (*current_state, first_time)
elif self.opcode_state[tid][:5] != current_state:
- # State changed - emit marker for previous state
prev_opcode, prev_lineno, prev_col, prev_funcname, prev_filename, prev_start = self.opcode_state[tid]
self._add_opcode_interval_marker(
- tid, prev_opcode, prev_lineno, prev_col, prev_funcname, prev_start, current_time
+ tid, prev_opcode, prev_lineno, prev_col, prev_funcname, prev_start, first_time
)
- # Start tracking new state
- self.opcode_state[tid] = (*current_state, current_time)
+ self.opcode_state[tid] = (*current_state, first_time)
- self.sample_count += 1
+ self.sample_count += len(times)
def _create_thread(self, tid):
"""Create a new thread structure with processed profile format."""
}
self.stats.update(kwargs)
- def process_frames(self, frames, thread_id):
+ def process_frames(self, frames, thread_id, weight=1):
"""Process stack frames and count samples per line.
Args:
leaf-to-root order. location is (lineno, end_lineno, col_offset, end_col_offset).
opcode is None if not gathered.
thread_id: Thread ID for this stack trace
+ weight: Number of samples this stack represents (for batched RLE)
"""
- self._total_samples += 1
+ self._total_samples += weight
self._seen_lines.clear()
for i, (filename, location, funcname, opcode) in enumerate(frames):
self._seen_lines.add(line_key)
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
- count_cumulative=count_cumulative)
+ count_cumulative=count_cumulative, weight=weight)
if opcode is not None:
# Set opcodes_enabled flag when we first encounter opcode data
self.opcodes_enabled = True
self._record_bytecode_sample(filename, lineno, opcode,
- end_lineno, col_offset, end_col_offset)
+ end_lineno, col_offset, end_col_offset,
+ weight=weight)
- # Build call graph for adjacent frames
+ # Build call graph for adjacent frames (relationships are deduplicated anyway)
if i + 1 < len(frames):
next_frame = frames[i + 1]
next_lineno = extract_lineno(next_frame[1])
return True
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
- count_cumulative=True):
+ count_cumulative=True, weight=1):
"""Record a sample for a specific line."""
# Track cumulative samples (all occurrences in stack)
if count_cumulative:
- self.line_samples[(filename, lineno)] += 1
- self.file_samples[filename][lineno] += 1
+ self.line_samples[(filename, lineno)] += weight
+ self.file_samples[filename][lineno] += weight
# Track self/leaf samples (only when at top of stack)
if is_leaf:
- self.line_self_samples[(filename, lineno)] += 1
- self.file_self_samples[filename][lineno] += 1
+ self.line_self_samples[(filename, lineno)] += weight
+ self.file_self_samples[filename][lineno] += weight
# Record function definition location
if funcname and (filename, funcname) not in self.function_definitions:
self.function_definitions[(filename, funcname)] = lineno
def _record_bytecode_sample(self, filename, lineno, opcode,
- end_lineno=None, col_offset=None, end_col_offset=None):
+ end_lineno=None, col_offset=None, end_col_offset=None,
+ weight=1):
"""Record a sample for a specific bytecode instruction.
Args:
end_lineno: End line number (may be -1 if not available)
col_offset: Column offset in UTF-8 bytes (may be -1 if not available)
end_col_offset: End column offset in UTF-8 bytes (may be -1 if not available)
+ weight: Number of samples this represents (for batched RLE)
"""
key = (filename, lineno)
if opcode not in self.line_opcodes[key]:
self.line_opcodes[key][opcode] = {'count': 0, 'locations': set()}
- self.line_opcodes[key][opcode]['count'] += 1
+ self.line_opcodes[key][opcode]['count'] += weight
# Store unique location info if column offset is available (not -1)
if col_offset is not None and col_offset >= 0:
self.failed_samples += 1
self.total_samples += 1
- def collect(self, stack_frames):
+ def collect(self, stack_frames, timestamp_us=None):
"""Collect and display profiling data."""
if self.start_time is None:
self.start_time = time.perf_counter()
self.skip_idle = skip_idle
self._seen_locations = set()
- def _process_frames(self, frames):
+ def _process_frames(self, frames, weight=1):
"""Process a single thread's frame stack."""
if not frames:
return
location = (frame.filename, lineno, frame.funcname)
if location not in self._seen_locations:
self._seen_locations.add(location)
- self.result[location]["cumulative_calls"] += 1
+ self.result[location]["cumulative_calls"] += weight
# The top frame gets counted as an inline call (directly executing)
top_lineno = extract_lineno(frames[0].location)
top_location = (frames[0].filename, top_lineno, frames[0].funcname)
- self.result[top_location]["direct_calls"] += 1
+ self.result[top_location]["direct_calls"] += weight
# Track caller-callee relationships for call graph
for i in range(1, len(frames)):
callee = (callee_frame.filename, callee_lineno, callee_frame.funcname)
caller = (caller_frame.filename, caller_lineno, caller_frame.funcname)
- self.callers[callee][caller] += 1
+ self.callers[callee][caller] += weight
- def collect(self, stack_frames):
- if stack_frames and hasattr(stack_frames[0], "awaited_by"):
- # Async frame processing
- for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
- self._process_frames(frames)
- else:
- # Regular frame processing
- for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
- self._process_frames(frames)
+ def collect(self, stack_frames, timestamps_us=None):
+ weight = len(timestamps_us) if timestamps_us else 1
+ for frames, _ in self._iter_stacks(stack_frames, skip_idle=self.skip_idle):
+ self._process_frames(frames, weight=weight)
def export(self, filename):
self.create_stats()
from collections import deque
from _colorize import ANSIColors
+from .binary_collector import BinaryCollector
from .constants import (
PROFILING_MODE_WALL,
PROFILING_MODE_CPU,
if self.collect_stats:
self._print_unwinder_stats()
+ if isinstance(collector, BinaryCollector):
+ self._print_binary_stats(collector)
+
# Pass stats to flamegraph collector if it's the right type
if hasattr(collector, 'set_stats'):
collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode)
if stale_invalidations > 0:
print(f" {ANSIColors.YELLOW}Stale cache invalidations: {stale_invalidations}{ANSIColors.RESET}")
+ def _print_binary_stats(self, collector):
+ """Print binary I/O encoding statistics."""
+ try:
+ stats = collector.get_stats()
+ except (ValueError, RuntimeError):
+ return # Collector closed or stats unavailable
+
+ print(f" {ANSIColors.CYAN}Binary Encoding:{ANSIColors.RESET}")
+
+ repeat_records = stats.get('repeat_records', 0)
+ repeat_samples = stats.get('repeat_samples', 0)
+ full_records = stats.get('full_records', 0)
+ suffix_records = stats.get('suffix_records', 0)
+ pop_push_records = stats.get('pop_push_records', 0)
+ total_records = stats.get('total_records', 0)
+
+ if total_records > 0:
+ repeat_pct = repeat_records / total_records * 100
+ full_pct = full_records / total_records * 100
+ suffix_pct = suffix_records / total_records * 100
+ pop_push_pct = pop_push_records / total_records * 100
+ else:
+ repeat_pct = full_pct = suffix_pct = pop_push_pct = 0
+
+ print(f" Records: {total_records:,}")
+ print(f" RLE repeat: {repeat_records:,} ({ANSIColors.GREEN}{repeat_pct:.1f}%{ANSIColors.RESET}) [{repeat_samples:,} samples]")
+ print(f" Full stack: {full_records:,} ({full_pct:.1f}%)")
+ print(f" Suffix match: {suffix_records:,} ({suffix_pct:.1f}%)")
+ print(f" Pop-push: {pop_push_records:,} ({pop_push_pct:.1f}%)")
+
+ frames_written = stats.get('total_frames_written', 0)
+ frames_saved = stats.get('frames_saved', 0)
+ compression_pct = stats.get('frame_compression_pct', 0)
+
+ print(f" {ANSIColors.CYAN}Frame Efficiency:{ANSIColors.RESET}")
+ print(f" Frames written: {frames_written:,}")
+ print(f" Frames saved: {frames_saved:,} ({ANSIColors.GREEN}{compression_pct:.1f}%{ANSIColors.RESET})")
+
+ bytes_written = stats.get('bytes_written', 0)
+ if bytes_written >= 1024 * 1024:
+ bytes_str = f"{bytes_written / (1024 * 1024):.1f} MB"
+ elif bytes_written >= 1024:
+ bytes_str = f"{bytes_written / 1024:.1f} KB"
+ else:
+ bytes_str = f"{bytes_written} B"
+ print(f" Bytes (pre-zstd): {bytes_str}")
+
def _is_process_running(pid):
if pid <= 0:
self.sample_interval_usec = sample_interval_usec
self.skip_idle = skip_idle
- def collect(self, stack_frames, skip_idle=False):
- if stack_frames and hasattr(stack_frames[0], "awaited_by"):
- # Async-aware mode: process async task frames
- for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
- if not frames:
- continue
- self.process_frames(frames, thread_id)
- else:
- # Sync-only mode
- for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
- if not frames:
- continue
- self.process_frames(frames, thread_id)
+ def collect(self, stack_frames, timestamps_us=None, skip_idle=False):
+ weight = len(timestamps_us) if timestamps_us else 1
+ for frames, thread_id in self._iter_stacks(stack_frames, skip_idle=skip_idle):
+ self.process_frames(frames, thread_id, weight=weight)
- def process_frames(self, frames, thread_id):
+ def process_frames(self, frames, thread_id, weight=1):
pass
super().__init__(*args, **kwargs)
self.stack_counter = collections.Counter()
- def process_frames(self, frames, thread_id):
+ def process_frames(self, frames, thread_id, weight=1):
# Extract only (filename, lineno, funcname) - opcode not needed for collapsed stacks
# frame is (filename, location, funcname, opcode)
call_tree = tuple(
(f[0], extract_lineno(f[1]), f[2]) for f in reversed(frames)
)
- self.stack_counter[(call_tree, thread_id)] += 1
+ self.stack_counter[(call_tree, thread_id)] += weight
def export(self, filename):
lines = []
# Per-thread statistics
self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, has_exception, total, gc_samples}}
- def collect(self, stack_frames, skip_idle=False):
+ def collect(self, stack_frames, timestamps_us=None, skip_idle=False):
"""Override to track thread status statistics before processing frames."""
- # Increment sample count once per sample
- self._sample_count += 1
+ # Weight is number of timestamps (samples with identical stack)
+ weight = len(timestamps_us) if timestamps_us else 1
+
+ # Increment sample count by weight
+ self._sample_count += weight
# Collect both aggregate and per-thread statistics using base method
status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
- # Merge aggregate status counts
+ # Merge aggregate status counts (multiply by weight)
for key in status_counts:
- self.thread_status_counts[key] += status_counts[key]
+ self.thread_status_counts[key] += status_counts[key] * weight
# Update aggregate GC frame count
if has_gc_frame:
- self.samples_with_gc_frames += 1
+ self.samples_with_gc_frames += weight
- # Merge per-thread statistics
+ # Merge per-thread statistics (multiply by weight)
for thread_id, stats in per_thread_stats.items():
if thread_id not in self.per_thread_stats:
self.per_thread_stats[thread_id] = {
"gc_samples": 0,
}
for key, value in stats.items():
- self.per_thread_stats[thread_id][key] += value
+ self.per_thread_stats[thread_id][key] += value * weight
# Call parent collect to process frames
- super().collect(stack_frames, skip_idle=skip_idle)
+ super().collect(stack_frames, timestamps_us, skip_idle=skip_idle)
def set_stats(self, sample_interval_usec, duration_sec, sample_rate,
error_rate=None, missed_samples=None, mode=None):
"opcode_mapping": opcode_mapping
}
- def process_frames(self, frames, thread_id):
+ def process_frames(self, frames, thread_id, weight=1):
"""Process stack frames into flamegraph tree structure.
Args:
leaf-to-root order. location is (lineno, end_lineno, col_offset, end_col_offset).
opcode is None if not gathered.
thread_id: Thread ID for this stack trace
+ weight: Number of samples this stack represents (for batched RLE)
"""
# Reverse to root->leaf order for tree building
- self._root["samples"] += 1
- self._total_samples += 1
+ self._root["samples"] += weight
+ self._total_samples += weight
self._root["threads"].add(thread_id)
self._all_threads.add(thread_id)
if node is None:
node = {"samples": 0, "children": {}, "threads": set(), "opcodes": collections.Counter()}
current["children"][func] = node
- node["samples"] += 1
+ node["samples"] += weight
node["threads"].add(thread_id)
if opcode is not None:
- node["opcodes"][opcode] += 1
+ node["opcodes"][opcode] += weight
current = node
--- /dev/null
+"""Tests for binary format round-trip functionality."""
+
+import os
+import random
+import tempfile
+import unittest
+from collections import defaultdict
+
+try:
+ import _remote_debugging
+ from _remote_debugging import (
+ InterpreterInfo,
+ ThreadInfo,
+ FrameInfo,
+ LocationInfo,
+ THREAD_STATUS_HAS_GIL,
+ THREAD_STATUS_ON_CPU,
+ THREAD_STATUS_UNKNOWN,
+ THREAD_STATUS_GIL_REQUESTED,
+ THREAD_STATUS_HAS_EXCEPTION,
+ )
+ from profiling.sampling.binary_collector import BinaryCollector
+ from profiling.sampling.binary_reader import BinaryReader
+
+ ZSTD_AVAILABLE = _remote_debugging.zstd_available()
+except ImportError:
+ raise unittest.SkipTest(
+ "Test only runs when _remote_debugging is available"
+ )
+
+
+def make_frame(filename, lineno, funcname):
+ """Create a FrameInfo struct sequence."""
+ location = LocationInfo((lineno, lineno, -1, -1))
+ return FrameInfo((filename, location, funcname, None))
+
+
+def make_thread(thread_id, frames, status=0):
+ """Create a ThreadInfo struct sequence."""
+ return ThreadInfo((thread_id, status, frames))
+
+
+def make_interpreter(interp_id, threads):
+ """Create an InterpreterInfo struct sequence."""
+ return InterpreterInfo((interp_id, threads))
+
+
+def extract_lineno(location):
+ """Extract line number from location (tuple or int or None)."""
+ if location is None:
+ return 0 # Treat None as 0
+ if isinstance(location, tuple):
+ return location[0] if location[0] is not None else 0
+ return location
+
+
+class RawCollector:
+ """Collector that captures all raw data grouped by thread."""
+
+ def __init__(self):
+ # Key: (interpreter_id, thread_id) -> list of samples for that thread
+ self.by_thread = defaultdict(list)
+ self.total_count = 0
+
+ def collect(self, stack_frames, timestamps_us):
+ """Capture the raw sample data."""
+ # timestamps_us is a list; add one sample per timestamp
+ count = len(timestamps_us)
+ for interp in stack_frames:
+ for thread in interp.threads:
+ frames = []
+ for frame in thread.frame_info:
+ frames.append(
+ {
+ "filename": frame.filename,
+ "funcname": frame.funcname,
+ "lineno": extract_lineno(frame.location),
+ }
+ )
+ key = (interp.interpreter_id, thread.thread_id)
+ sample = {"status": thread.status, "frames": frames}
+ for _ in range(count):
+ self.by_thread[key].append(sample)
+ self.total_count += count
+
+ def export(self, filename):
+ pass
+
+
+def samples_to_by_thread(samples):
+ """Convert input samples to by-thread format for comparison."""
+ by_thread = defaultdict(list)
+ for sample in samples:
+ for interp in sample:
+ for thread in interp.threads:
+ frames = []
+ for frame in thread.frame_info:
+ frames.append(
+ {
+ "filename": frame.filename,
+ "funcname": frame.funcname,
+ "lineno": extract_lineno(frame.location),
+ }
+ )
+ key = (interp.interpreter_id, thread.thread_id)
+ by_thread[key].append(
+ {
+ "status": thread.status,
+ "frames": frames,
+ }
+ )
+ return by_thread
+
+
+class BinaryFormatTestBase(unittest.TestCase):
+ """Base class with common setup/teardown for binary format tests."""
+
+ def setUp(self):
+ self.temp_files = []
+
+ def tearDown(self):
+ for f in self.temp_files:
+ if os.path.exists(f):
+ os.unlink(f)
+
+ def create_binary_file(self, samples, interval=1000, compression="none"):
+ """Create a test binary file and track it for cleanup."""
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ collector = BinaryCollector(
+ filename, interval, compression=compression
+ )
+ for sample in samples:
+ collector.collect(sample)
+ collector.export(None)
+ return filename
+
+ def roundtrip(self, samples, interval=1000, compression="none"):
+ """Write samples to binary and read back."""
+ filename = self.create_binary_file(samples, interval, compression)
+ collector = RawCollector()
+ with BinaryReader(filename) as reader:
+ count = reader.replay_samples(collector)
+ return collector, count
+
+ def assert_samples_equal(self, expected_samples, collector):
+ """Assert that roundtripped samples match input exactly, per-thread."""
+ expected = samples_to_by_thread(expected_samples)
+
+ # Same threads present
+ self.assertEqual(
+ set(expected.keys()),
+ set(collector.by_thread.keys()),
+ "Thread set mismatch",
+ )
+
+ # For each thread, samples match in order
+ for key in expected:
+ exp_samples = expected[key]
+ act_samples = collector.by_thread[key]
+ interp_id, thread_id = key
+
+ self.assertEqual(
+ len(exp_samples),
+ len(act_samples),
+ f"Thread ({interp_id}, {thread_id}): sample count mismatch "
+ f"(expected {len(exp_samples)}, got {len(act_samples)})",
+ )
+
+ for i, (exp, act) in enumerate(zip(exp_samples, act_samples)):
+ self.assertEqual(
+ exp["status"],
+ act["status"],
+ f"Thread ({interp_id}, {thread_id}), sample {i}: "
+ f"status mismatch (expected {exp['status']}, got {act['status']})",
+ )
+
+ self.assertEqual(
+ len(exp["frames"]),
+ len(act["frames"]),
+ f"Thread ({interp_id}, {thread_id}), sample {i}: "
+ f"frame count mismatch",
+ )
+
+ for j, (exp_frame, act_frame) in enumerate(
+ zip(exp["frames"], act["frames"])
+ ):
+ self.assertEqual(
+ exp_frame["filename"],
+ act_frame["filename"],
+ f"Thread ({interp_id}, {thread_id}), sample {i}, "
+ f"frame {j}: filename mismatch",
+ )
+ self.assertEqual(
+ exp_frame["funcname"],
+ act_frame["funcname"],
+ f"Thread ({interp_id}, {thread_id}), sample {i}, "
+ f"frame {j}: funcname mismatch",
+ )
+ self.assertEqual(
+ exp_frame["lineno"],
+ act_frame["lineno"],
+ f"Thread ({interp_id}, {thread_id}), sample {i}, "
+ f"frame {j}: lineno mismatch "
+ f"(expected {exp_frame['lineno']}, got {act_frame['lineno']})",
+ )
+
+
+class TestBinaryRoundTrip(BinaryFormatTestBase):
+ """Tests for exact binary format round-trip."""
+
+ def test_single_sample_single_frame(self):
+ """Single sample with one frame roundtrips exactly."""
+ samples = [
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 12345, [make_frame("test.py", 42, "myfunc")]
+ )
+ ],
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_single_sample_multi_frame(self):
+ """Single sample with call stack roundtrips exactly."""
+ frames = [
+ make_frame("inner.py", 10, "inner"),
+ make_frame("middle.py", 20, "middle"),
+ make_frame("outer.py", 30, "outer"),
+ ]
+ samples = [[make_interpreter(0, [make_thread(100, frames)])]]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_multiple_samples_same_stack(self):
+ """Multiple identical samples roundtrip exactly (tests RLE)."""
+ frame = make_frame("hot.py", 99, "hot_func")
+ samples = [
+ [make_interpreter(0, [make_thread(1, [frame])])]
+ for _ in range(100)
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 100)
+ self.assert_samples_equal(samples, collector)
+
+ def test_multiple_samples_varying_stacks(self):
+ """Multiple samples with varying stacks roundtrip exactly."""
+ samples = []
+ for i in range(20):
+ depth = i % 5 + 1
+ frames = [
+ make_frame(f"f{j}.py", j * 10 + i, f"func{j}")
+ for j in range(depth)
+ ]
+ samples.append([make_interpreter(0, [make_thread(1, frames)])])
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 20)
+ self.assert_samples_equal(samples, collector)
+
+ def test_thread_ids_preserved(self):
+ """Thread IDs are preserved exactly."""
+ thread_ids = [1, 12345, 0x7FFF12345678, 999999]
+ samples = []
+ for tid in thread_ids:
+ samples.append(
+ [
+ make_interpreter(
+ 0, [make_thread(tid, [make_frame("t.py", 10, "f")])]
+ )
+ ]
+ )
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(thread_ids))
+ self.assert_samples_equal(samples, collector)
+
+ def test_interpreter_ids_preserved(self):
+ """Interpreter IDs are preserved exactly."""
+ interp_ids = [0, 1, 5, 100]
+ samples = []
+ for iid in interp_ids:
+ samples.append(
+ [
+ make_interpreter(
+ iid, [make_thread(1, [make_frame("i.py", 10, "f")])]
+ )
+ ]
+ )
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(interp_ids))
+ self.assert_samples_equal(samples, collector)
+
+ def test_status_flags_preserved(self):
+ """All thread status flags are preserved exactly."""
+ statuses = [
+ 0,
+ THREAD_STATUS_HAS_GIL,
+ THREAD_STATUS_ON_CPU,
+ THREAD_STATUS_UNKNOWN,
+ THREAD_STATUS_GIL_REQUESTED,
+ THREAD_STATUS_HAS_EXCEPTION,
+ THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU,
+ THREAD_STATUS_HAS_GIL | THREAD_STATUS_HAS_EXCEPTION,
+ THREAD_STATUS_HAS_GIL
+ | THREAD_STATUS_ON_CPU
+ | THREAD_STATUS_GIL_REQUESTED,
+ ]
+ samples = []
+ for i, status in enumerate(statuses):
+ samples.append(
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1, [make_frame("s.py", 10 + i, "f")], status
+ )
+ ],
+ )
+ ]
+ )
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(statuses))
+ self.assert_samples_equal(samples, collector)
+
+ def test_multiple_threads_per_sample(self):
+ """Multiple threads in one sample roundtrip exactly."""
+ threads = [
+ make_thread(
+ 1, [make_frame("t1.py", 10, "t1")], THREAD_STATUS_HAS_GIL
+ ),
+ make_thread(
+ 2, [make_frame("t2.py", 20, "t2")], THREAD_STATUS_ON_CPU
+ ),
+ make_thread(3, [make_frame("t3.py", 30, "t3")], 0),
+ ]
+ samples = [[make_interpreter(0, threads)] for _ in range(10)]
+ collector, count = self.roundtrip(samples)
+ # 10 samples × 3 threads = 30 thread-samples
+ self.assertEqual(count, 30)
+ self.assert_samples_equal(samples, collector)
+
+ def test_multiple_interpreters_per_sample(self):
+ """Multiple interpreters in one sample roundtrip exactly."""
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("i0.py", 10, "i0")])]
+ ),
+ make_interpreter(
+ 1, [make_thread(2, [make_frame("i1.py", 20, "i1")])]
+ ),
+ ]
+ for _ in range(5)
+ ]
+ collector, count = self.roundtrip(samples)
+ # 5 samples × 2 interpreters × 1 thread = 10 thread-samples
+ self.assertEqual(count, 10)
+ self.assert_samples_equal(samples, collector)
+
+ def test_same_thread_id_different_interpreters(self):
+ """Same thread_id in different interpreters must be tracked separately."""
+ # This test catches bugs where thread state is keyed only by thread_id
+ # without considering interpreter_id
+ samples = []
+ # Interleave samples from interpreter 0 and 1, both using thread_id=1
+ for i in range(20):
+ interp_id = i % 2 # Alternate between interpreter 0 and 1
+ frame = make_frame(
+ f"interp{interp_id}.py", 10 + i, f"func{interp_id}"
+ )
+ samples.append(
+ [make_interpreter(interp_id, [make_thread(1, [frame])])]
+ )
+
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 20)
+ self.assert_samples_equal(samples, collector)
+
+ # Verify both interpreters are present
+ keys = set(collector.by_thread.keys())
+ self.assertIn((0, 1), keys) # interpreter 0, thread 1
+ self.assertIn((1, 1), keys) # interpreter 1, thread 1
+
+ # Verify each interpreter got 10 samples
+ self.assertEqual(len(collector.by_thread[(0, 1)]), 10)
+ self.assertEqual(len(collector.by_thread[(1, 1)]), 10)
+
+ # Verify the samples are in the right order for each interpreter
+ for i, sample in enumerate(collector.by_thread[(0, 1)]):
+ expected_lineno = 10 + i * 2 # 10, 12, 14, ...
+ self.assertEqual(sample["frames"][0]["lineno"], expected_lineno)
+ self.assertEqual(sample["frames"][0]["filename"], "interp0.py")
+
+ for i, sample in enumerate(collector.by_thread[(1, 1)]):
+ expected_lineno = 11 + i * 2 # 11, 13, 15, ...
+ self.assertEqual(sample["frames"][0]["lineno"], expected_lineno)
+ self.assertEqual(sample["frames"][0]["filename"], "interp1.py")
+
+ def test_deep_call_stack(self):
+ """Deep call stack roundtrips exactly."""
+ depth = 100
+ frames = [
+ make_frame(f"f{i}.py", i + 1, f"func{i}") for i in range(depth)
+ ]
+ samples = [[make_interpreter(0, [make_thread(1, frames)])]]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_line_numbers_preserved(self):
+ """Various line numbers are preserved exactly."""
+ linenos = [1, 100, 1000, 65535, 100000]
+ samples = []
+ for lineno in linenos:
+ samples.append(
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("l.py", lineno, "f")])]
+ )
+ ]
+ )
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(linenos))
+ self.assert_samples_equal(samples, collector)
+
+ @unittest.skipUnless(ZSTD_AVAILABLE, "zstd compression not available")
+ def test_zstd_compression_roundtrip(self):
+ """Zstd compressed data roundtrips exactly."""
+ samples = []
+ for i in range(200):
+ frames = [
+ make_frame(f"z{j}.py", j * 10 + i + 1, f"zfunc{j}")
+ for j in range(3)
+ ]
+ samples.append([make_interpreter(0, [make_thread(1, frames)])])
+ collector, count = self.roundtrip(samples, compression="zstd")
+ self.assertEqual(count, 200)
+ self.assert_samples_equal(samples, collector)
+
+ def test_sample_interval_preserved(self):
+ """Sample interval is preserved in file metadata."""
+ intervals = [100, 500, 1000, 5000, 10000]
+ for interval in intervals:
+ with self.subTest(interval=interval):
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("i.py", 1, "f")])]
+ )
+ ]
+ ]
+ filename = self.create_binary_file(samples, interval=interval)
+ with BinaryReader(filename) as reader:
+ info = reader.get_info()
+ self.assertEqual(info["sample_interval_us"], interval)
+
+ def test_threads_interleaved_samples(self):
+ """Multiple threads with interleaved varying samples."""
+ samples = []
+ for i in range(30):
+ threads = [
+ make_thread(
+ 1,
+ [make_frame("t1.py", 10 + i, "t1")],
+ THREAD_STATUS_HAS_GIL if i % 2 == 0 else 0,
+ ),
+ make_thread(
+ 2,
+ [make_frame("t2.py", 20 + i, "t2")],
+ THREAD_STATUS_ON_CPU if i % 3 == 0 else 0,
+ ),
+ ]
+ samples.append([make_interpreter(0, threads)])
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 60)
+ self.assert_samples_equal(samples, collector)
+
+
+class TestBinaryEdgeCases(BinaryFormatTestBase):
+ """Tests for edge cases in binary format."""
+
+ def test_unicode_filenames(self):
+ """Unicode filenames roundtrip exactly."""
+ filenames = [
+ "/путь/файл.py",
+ "/路径/文件.py",
+ "/パス/ファイル.py",
+ "/chemin/café.py",
+ ]
+ for fname in filenames:
+ with self.subTest(filename=fname):
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame(fname, 1, "func")])]
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_unicode_funcnames(self):
+ """Unicode function names roundtrip exactly."""
+ funcnames = [
+ "функция",
+ "函数",
+ "関数",
+ "función",
+ ]
+ for funcname in funcnames:
+ with self.subTest(funcname=funcname):
+ samples = [
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1, [make_frame("test.py", 1, funcname)]
+ )
+ ],
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_special_char_filenames(self):
+ """Filenames with special characters roundtrip exactly."""
+ filenames = [
+ "/path/with spaces/file.py",
+ "/path/with\ttab/file.py",
+ "/path/with'quote/file.py",
+ '/path/with"double/file.py',
+ "/path/with\\backslash/file.py",
+ ]
+ for fname in filenames:
+ with self.subTest(filename=fname):
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame(fname, 1, "func")])]
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_special_funcnames(self):
+ """Function names with special characters roundtrip exactly."""
+ funcnames = [
+ "<lambda>",
+ "<listcomp>",
+ "<genexpr>",
+ "<module>",
+ "__init__",
+ "func.inner",
+ ]
+ for funcname in funcnames:
+ with self.subTest(funcname=funcname):
+ samples = [
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1, [make_frame("test.py", 1, funcname)]
+ )
+ ],
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_long_filename(self):
+ """Long filename roundtrips exactly."""
+ long_file = "/very/long/path/" + "sub/" * 50 + "file.py"
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame(long_file, 1, "func")])]
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_long_funcname(self):
+ """Long function name roundtrips exactly."""
+ long_func = "very_long_function_name_" + "x" * 200
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("test.py", 1, long_func)])]
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_empty_funcname(self):
+ """Empty function name roundtrips exactly."""
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("test.py", 1, "")])]
+ )
+ ]
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ @unittest.skipUnless(ZSTD_AVAILABLE, "zstd compression not available")
+ def test_large_sample_count(self):
+ """Large number of samples roundtrips exactly."""
+ num = 5000
+ samples = [
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1, [make_frame("test.py", (i % 100) + 1, "func")]
+ )
+ ],
+ )
+ ]
+ for i in range(num)
+ ]
+ collector, count = self.roundtrip(samples, compression="zstd")
+ self.assertEqual(count, num)
+ self.assert_samples_equal(samples, collector)
+
+ def test_context_manager_cleanup(self):
+ """Reader cleans up on context exit."""
+ samples = [
+ [
+ make_interpreter(
+ 0, [make_thread(1, [make_frame("t.py", 1, "f")])]
+ )
+ ]
+ ]
+ filename = self.create_binary_file(samples)
+ reader = BinaryReader(filename)
+ with reader:
+ collector = RawCollector()
+ count = reader.replay_samples(collector)
+ self.assertEqual(count, 1)
+ with self.assertRaises(RuntimeError):
+ reader.replay_samples(collector)
+
+ def test_invalid_file_path(self):
+ """Invalid file path raises appropriate error."""
+ with self.assertRaises((FileNotFoundError, OSError, ValueError)):
+ with BinaryReader("/nonexistent/path/file.bin") as reader:
+ reader.replay_samples(RawCollector())
+
+
+class TestBinaryEncodings(BinaryFormatTestBase):
+ """Tests specifically targeting different stack encodings."""
+
+ def test_stack_full_encoding(self):
+ """First sample uses STACK_FULL encoding and roundtrips."""
+ frames = [make_frame(f"f{i}.py", i + 1, f"func{i}") for i in range(5)]
+ samples = [[make_interpreter(0, [make_thread(1, frames)])]]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1)
+ self.assert_samples_equal(samples, collector)
+
+ def test_stack_repeat_encoding(self):
+ """Identical consecutive samples use RLE and roundtrip."""
+ frame = make_frame("repeat.py", 42, "repeat_func")
+ samples = [
+ [make_interpreter(0, [make_thread(1, [frame])])]
+ for _ in range(1000)
+ ]
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 1000)
+ self.assert_samples_equal(samples, collector)
+
+ def test_stack_suffix_encoding(self):
+ """Samples sharing suffix use STACK_SUFFIX and roundtrip."""
+ samples = []
+ for i in range(10):
+ frames = [make_frame(f"new{i}.py", i + 1, f"new{i}")]
+ frames.extend(
+ [
+ make_frame(f"shared{j}.py", j + 1, f"shared{j}")
+ for j in range(5)
+ ]
+ )
+ samples.append([make_interpreter(0, [make_thread(1, frames)])])
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 10)
+ self.assert_samples_equal(samples, collector)
+
+ def test_stack_pop_push_encoding(self):
+ """Samples with pop+push pattern roundtrip."""
+ samples = []
+ base_frames = [make_frame("base.py", 10, "base")]
+
+ # Call deeper
+ samples.append([make_interpreter(0, [make_thread(1, base_frames)])])
+ samples.append(
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1,
+ [make_frame("call1.py", 20, "call1")]
+ + base_frames,
+ )
+ ],
+ )
+ ]
+ )
+ samples.append(
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1,
+ [
+ make_frame("call2.py", 30, "call2"),
+ make_frame("call1.py", 20, "call1"),
+ ]
+ + base_frames,
+ )
+ ],
+ )
+ ]
+ )
+ # Return
+ samples.append(
+ [
+ make_interpreter(
+ 0,
+ [
+ make_thread(
+ 1,
+ [make_frame("call1.py", 25, "call1")]
+ + base_frames,
+ )
+ ],
+ )
+ ]
+ )
+ samples.append([make_interpreter(0, [make_thread(1, base_frames)])])
+
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 5)
+ self.assert_samples_equal(samples, collector)
+
+ def test_mixed_encodings(self):
+ """Mix of different encoding patterns roundtrips."""
+ samples = []
+ # Some repeated samples (RLE)
+ frame1 = make_frame("hot.py", 1, "hot")
+ for _ in range(20):
+ samples.append([make_interpreter(0, [make_thread(1, [frame1])])])
+ # Some varying samples
+ for i in range(20):
+ frames = [make_frame(f"vary{i}.py", i + 1, f"vary{i}")]
+ samples.append([make_interpreter(0, [make_thread(1, frames)])])
+ # More repeated
+ for _ in range(20):
+ samples.append([make_interpreter(0, [make_thread(1, [frame1])])])
+
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 60)
+ self.assert_samples_equal(samples, collector)
+
+ def test_alternating_threads_status_changes(self):
+ """Alternating thread status changes roundtrip correctly."""
+ samples = []
+ for i in range(50):
+ status1 = THREAD_STATUS_HAS_GIL if i % 2 == 0 else 0
+ status2 = (
+ THREAD_STATUS_ON_CPU if i % 3 == 0 else THREAD_STATUS_HAS_GIL
+ )
+ threads = [
+ make_thread(1, [make_frame("t1.py", 10, "t1")], status1),
+ make_thread(2, [make_frame("t2.py", 20, "t2")], status2),
+ ]
+ samples.append([make_interpreter(0, threads)])
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, 100)
+ self.assert_samples_equal(samples, collector)
+
+
+class TestBinaryStress(BinaryFormatTestBase):
+ """Randomized stress tests for binary format."""
+
+ @unittest.skipUnless(ZSTD_AVAILABLE, "zstd compression not available")
+ def test_random_samples_stress(self):
+ """Stress test with random samples - exercises hash table resizing."""
+ random.seed(42) # Reproducible
+
+ # Large pools to force hash table resizing (exceeds initial 8192/4096 sizes)
+ filenames = [f"file{i}.py" for i in range(200)]
+ funcnames = [f"func{i}" for i in range(300)]
+ thread_ids = list(range(1, 50))
+ interp_ids = list(range(10))
+ statuses = [
+ 0,
+ THREAD_STATUS_HAS_GIL,
+ THREAD_STATUS_ON_CPU,
+ THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU,
+ THREAD_STATUS_HAS_EXCEPTION,
+ ]
+
+ samples = []
+ for _ in range(1000):
+ num_interps = random.randint(1, 3)
+ interps = []
+ for _ in range(num_interps):
+ iid = random.choice(interp_ids)
+ num_threads = random.randint(1, 5)
+ threads = []
+ for _ in range(num_threads):
+ tid = random.choice(thread_ids)
+ status = random.choice(statuses)
+ depth = random.randint(1, 15)
+ frames = []
+ for _ in range(depth):
+ fname = random.choice(filenames)
+ func = random.choice(funcnames)
+ # Wide line number range to create many unique frames
+ lineno = random.randint(1, 5000)
+ frames.append(make_frame(fname, lineno, func))
+ threads.append(make_thread(tid, frames, status))
+ interps.append(make_interpreter(iid, threads))
+ samples.append(interps)
+
+ collector, count = self.roundtrip(samples, compression="zstd")
+ self.assertGreater(count, 0)
+ self.assert_samples_equal(samples, collector)
+
+ def test_rle_stress(self):
+ """Stress test RLE encoding with identical samples."""
+ random.seed(123)
+
+ # Create a few distinct stacks
+ stacks = []
+ for i in range(5):
+ depth = random.randint(1, 8)
+ frames = [
+ make_frame(f"rle{j}.py", j * 10, f"rle{j}")
+ for j in range(depth)
+ ]
+ stacks.append(frames)
+
+ # Generate samples with repeated stacks (should trigger RLE)
+ samples = []
+ for _ in range(100):
+ stack = random.choice(stacks)
+ repeat = random.randint(1, 50)
+ for _ in range(repeat):
+ samples.append([make_interpreter(0, [make_thread(1, stack)])])
+
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(samples))
+ self.assert_samples_equal(samples, collector)
+
+ @unittest.skipUnless(ZSTD_AVAILABLE, "zstd compression not available")
+ def test_multi_thread_stress(self):
+ """Stress test with many threads and interleaved samples."""
+ random.seed(456)
+
+ thread_ids = list(range(1, 20))
+ samples = []
+
+ for i in range(300):
+ # Randomly select 1-5 threads for this sample
+ num_threads = random.randint(1, 5)
+ selected = random.sample(thread_ids, num_threads)
+ threads = []
+ for tid in selected:
+ status = random.choice(
+ [0, THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU]
+ )
+ depth = random.randint(1, 5)
+ frames = [
+ make_frame(f"mt{tid}_{j}.py", i + j, f"f{j}")
+ for j in range(depth)
+ ]
+ threads.append(make_thread(tid, frames, status))
+ samples.append([make_interpreter(0, threads)])
+
+ collector, count = self.roundtrip(samples, compression="zstd")
+ self.assertGreater(count, 0)
+ self.assert_samples_equal(samples, collector)
+
+ def test_encoding_transitions_stress(self):
+ """Stress test stack encoding transitions."""
+ random.seed(789)
+
+ base_frames = [
+ make_frame(f"base{i}.py", i, f"base{i}") for i in range(5)
+ ]
+ samples = []
+
+ for i in range(200):
+ choice = random.randint(0, 4)
+ if choice == 0:
+ # Full new stack
+ depth = random.randint(1, 8)
+ frames = [
+ make_frame(f"new{i}_{j}.py", j, f"new{j}")
+ for j in range(depth)
+ ]
+ elif choice == 1:
+ # Repeat previous (will use RLE if identical)
+ frames = base_frames[: random.randint(1, 5)]
+ elif choice == 2:
+ # Add frames on top (suffix encoding)
+ extra = random.randint(1, 3)
+ frames = [
+ make_frame(f"top{i}_{j}.py", j, f"top{j}")
+ for j in range(extra)
+ ]
+ frames.extend(base_frames[: random.randint(2, 4)])
+ else:
+ # Pop and push (pop-push encoding)
+ keep = random.randint(1, 3)
+ push = random.randint(0, 2)
+ frames = [
+ make_frame(f"push{i}_{j}.py", j, f"push{j}")
+ for j in range(push)
+ ]
+ frames.extend(base_frames[:keep])
+
+ samples.append([make_interpreter(0, [make_thread(1, frames)])])
+
+ collector, count = self.roundtrip(samples)
+ self.assertEqual(count, len(samples))
+ self.assert_samples_equal(samples, collector)
+
+ @unittest.skipUnless(ZSTD_AVAILABLE, "zstd compression not available")
+ def test_same_thread_id_multiple_interpreters_stress(self):
+ """Stress test: same thread_id across multiple interpreters with interleaved samples.
+
+ This test catches bugs where thread state is keyed only by thread_id
+ without considering interpreter_id (both in writer and reader).
+ """
+ random.seed(999)
+
+ # Multiple interpreters, each with overlapping thread_ids
+ interp_ids = [0, 1, 2, 3]
+ # Same thread_ids used across all interpreters
+ shared_thread_ids = [1, 2, 3]
+
+ filenames = [f"file{i}.py" for i in range(10)]
+ funcnames = [f"func{i}" for i in range(15)]
+ statuses = [0, THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU]
+
+ samples = []
+ for i in range(1000):
+ # Randomly pick an interpreter
+ iid = random.choice(interp_ids)
+ # Randomly pick 1-3 threads (from shared pool)
+ num_threads = random.randint(1, 3)
+ selected_tids = random.sample(shared_thread_ids, num_threads)
+
+ threads = []
+ for tid in selected_tids:
+ status = random.choice(statuses)
+ depth = random.randint(1, 6)
+ frames = []
+ for d in range(depth):
+ # Include interpreter and thread info in frame data for verification
+ fname = f"i{iid}_t{tid}_{random.choice(filenames)}"
+ func = random.choice(funcnames)
+ lineno = i * 10 + d + 1 # Unique per sample
+ frames.append(make_frame(fname, lineno, func))
+ threads.append(make_thread(tid, frames, status))
+
+ samples.append([make_interpreter(iid, threads)])
+
+ collector, count = self.roundtrip(samples, compression="zstd")
+ self.assertGreater(count, 0)
+ self.assert_samples_equal(samples, collector)
+
+ # Verify that we have samples from multiple (interpreter, thread) combinations
+ # with the same thread_id
+ keys = set(collector.by_thread.keys())
+ # Should have samples for same thread_id in different interpreters
+ for tid in shared_thread_ids:
+ interps_with_tid = [iid for (iid, t) in keys if t == tid]
+ self.assertGreater(
+ len(interps_with_tid),
+ 1,
+ f"Thread {tid} should appear in multiple interpreters",
+ )
+
+
+class TimestampCollector:
+ """Collector that captures timestamps for verification."""
+
+ def __init__(self):
+ self.all_timestamps = []
+
+ def collect(self, stack_frames, timestamps_us=None):
+ if timestamps_us is not None:
+ self.all_timestamps.extend(timestamps_us)
+
+ def export(self, filename):
+ pass
+
+
+class TestTimestampPreservation(BinaryFormatTestBase):
+ """Tests for timestamp preservation during binary round-trip."""
+
+ def test_timestamp_preservation(self):
+ """Timestamps are preserved during round-trip."""
+ frame = make_frame("test.py", 10, "func")
+ timestamps = [1000000, 2000000, 3000000]
+
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ collector = BinaryCollector(filename, 1000, compression="none")
+ for ts in timestamps:
+ sample = [make_interpreter(0, [make_thread(1, [frame])])]
+ collector.collect(sample, timestamp_us=ts)
+ collector.export(None)
+
+ ts_collector = TimestampCollector()
+ with BinaryReader(filename) as reader:
+ count = reader.replay_samples(ts_collector)
+
+ self.assertEqual(count, 3)
+ self.assertEqual(ts_collector.all_timestamps, timestamps)
+
+ def test_timestamp_preservation_with_rle(self):
+ """RLE-batched samples preserve individual timestamps."""
+ frame = make_frame("rle.py", 42, "rle_func")
+
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ # Identical samples (triggers RLE) with different timestamps
+ collector = BinaryCollector(filename, 1000, compression="none")
+ expected_timestamps = []
+ for i in range(50):
+ ts = 1000000 + i * 100
+ expected_timestamps.append(ts)
+ sample = [make_interpreter(0, [make_thread(1, [frame])])]
+ collector.collect(sample, timestamp_us=ts)
+ collector.export(None)
+
+ ts_collector = TimestampCollector()
+ with BinaryReader(filename) as reader:
+ count = reader.replay_samples(ts_collector)
+
+ self.assertEqual(count, 50)
+ self.assertEqual(ts_collector.all_timestamps, expected_timestamps)
+
+
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+Add binary output format to :mod:`profiling.sampling` for compact storage of
+profiling data. The new ``--binary`` option captures samples to a file that
+can be converted to other formats using the ``replay`` command. Patch by
+Pablo Galindo
@MODULE__PICKLE_TRUE@_pickle _pickle.c
@MODULE__QUEUE_TRUE@_queue _queuemodule.c
@MODULE__RANDOM_TRUE@_random _randommodule.c
-@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c _remote_debugging/subprocess.c
+@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c _remote_debugging/binary_io_writer.c _remote_debugging/binary_io_reader.c _remote_debugging/subprocess.c
@MODULE__STRUCT_TRUE@_struct _struct.c
# build supports subinterpreters
#define Py_REMOTE_DEBUGGING_H
/* _GNU_SOURCE must be defined before any system headers */
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE_BUILTIN
+# ifndef Py_BUILD_CORE_MODULE
# define Py_BUILD_CORE_MODULE 1
+# endif
#endif
#include "Python.h"
PyTypeObject *ThreadInfo_Type;
PyTypeObject *InterpreterInfo_Type;
PyTypeObject *AwaitedInfo_Type;
+ PyTypeObject *BinaryWriter_Type;
+ PyTypeObject *BinaryReader_Type;
} RemoteDebuggingState;
enum _ThreadState {
--- /dev/null
+/******************************************************************************
+ * Python Remote Debugging Module - Binary I/O Header
+ *
+ * This header provides declarations for high-performance binary file I/O
+ * for profiling data with optional zstd streaming compression.
+ ******************************************************************************/
+
+#ifndef Py_BINARY_IO_H
+#define Py_BINARY_IO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "Python.h"
+#include "pycore_hashtable.h"
+#include <stdint.h>
+#include <stdio.h>
+
+/* ============================================================================
+ * BINARY FORMAT CONSTANTS
+ * ============================================================================ */
+
+#define BINARY_FORMAT_MAGIC 0x54414348 /* "TACH" (Tachyon) in native byte order */
+#define BINARY_FORMAT_MAGIC_SWAPPED 0x48434154 /* Byte-swapped magic for endianness detection */
+#define BINARY_FORMAT_VERSION 1
+
+/* Conditional byte-swap macros for cross-endian file reading.
+ * Uses Python's optimized byte-swap functions from pycore_bitutils.h */
+#define SWAP16_IF(swap, x) ((swap) ? _Py_bswap16(x) : (x))
+#define SWAP32_IF(swap, x) ((swap) ? _Py_bswap32(x) : (x))
+#define SWAP64_IF(swap, x) ((swap) ? _Py_bswap64(x) : (x))
+
+/* Header field offsets and sizes */
+#define HDR_OFF_MAGIC 0
+#define HDR_SIZE_MAGIC 4
+#define HDR_OFF_VERSION (HDR_OFF_MAGIC + HDR_SIZE_MAGIC)
+#define HDR_SIZE_VERSION 4
+#define HDR_OFF_PY_VERSION (HDR_OFF_VERSION + HDR_SIZE_VERSION)
+#define HDR_SIZE_PY_VERSION 4 /* 3 bytes: major, minor, micro + 1 reserved */
+#define HDR_OFF_PY_MAJOR HDR_OFF_PY_VERSION
+#define HDR_OFF_PY_MINOR (HDR_OFF_PY_VERSION + 1)
+#define HDR_OFF_PY_MICRO (HDR_OFF_PY_VERSION + 2)
+#define HDR_OFF_START_TIME (HDR_OFF_PY_VERSION + HDR_SIZE_PY_VERSION)
+#define HDR_SIZE_START_TIME 8
+#define HDR_OFF_INTERVAL (HDR_OFF_START_TIME + HDR_SIZE_START_TIME)
+#define HDR_SIZE_INTERVAL 8
+#define HDR_OFF_SAMPLES (HDR_OFF_INTERVAL + HDR_SIZE_INTERVAL)
+#define HDR_SIZE_SAMPLES 4
+#define HDR_OFF_THREADS (HDR_OFF_SAMPLES + HDR_SIZE_SAMPLES)
+#define HDR_SIZE_THREADS 4
+#define HDR_OFF_STR_TABLE (HDR_OFF_THREADS + HDR_SIZE_THREADS)
+#define HDR_SIZE_STR_TABLE 8
+#define HDR_OFF_FRAME_TABLE (HDR_OFF_STR_TABLE + HDR_SIZE_STR_TABLE)
+#define HDR_SIZE_FRAME_TABLE 8
+#define HDR_OFF_COMPRESSION (HDR_OFF_FRAME_TABLE + HDR_SIZE_FRAME_TABLE)
+#define HDR_SIZE_COMPRESSION 4
+#define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION)
+#define FILE_HEADER_PLACEHOLDER_SIZE 64
+
+static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE,
+ "FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE");
+
+/* Buffer sizes: 512KB balances syscall amortization against memory use,
+ * and aligns well with filesystem block sizes and zstd dictionary windows */
+#define WRITE_BUFFER_SIZE (512 * 1024)
+#define COMPRESSED_BUFFER_SIZE (512 * 1024)
+
+/* Compression types */
+#define COMPRESSION_NONE 0
+#define COMPRESSION_ZSTD 1
+
+/* Stack encoding types for delta compression */
+#define STACK_REPEAT 0x00 /* RLE: identical to previous, with count */
+#define STACK_FULL 0x01 /* Full stack (first sample or no match) */
+#define STACK_SUFFIX 0x02 /* Shares N frames from bottom */
+#define STACK_POP_PUSH 0x03 /* Remove M frames, add N frames */
+
+/* Maximum stack depth we'll buffer for delta encoding */
+#define MAX_STACK_DEPTH 256
+
+/* Initial capacity for RLE pending buffer */
+#define INITIAL_RLE_CAPACITY 64
+
+/* Initial capacities for dynamic arrays - sized to reduce reallocations */
+#define INITIAL_STRING_CAPACITY 4096
+#define INITIAL_FRAME_CAPACITY 4096
+#define INITIAL_THREAD_CAPACITY 256
+
+/* ============================================================================
+ * STATISTICS STRUCTURES
+ * ============================================================================ */
+
+/* Writer statistics - tracks encoding efficiency */
+typedef struct {
+ uint64_t repeat_records; /* Number of RLE repeat records written */
+ uint64_t repeat_samples; /* Total samples encoded via RLE */
+ uint64_t full_records; /* Number of full stack records */
+ uint64_t suffix_records; /* Number of suffix match records */
+ uint64_t pop_push_records; /* Number of pop-push records */
+ uint64_t total_frames_written;/* Total frame indices written */
+ uint64_t frames_saved; /* Frames avoided due to delta encoding */
+ uint64_t bytes_written; /* Total bytes written (before compression) */
+} BinaryWriterStats;
+
+/* Reader statistics - tracks reconstruction performance */
+typedef struct {
+ uint64_t repeat_records; /* RLE records decoded */
+ uint64_t repeat_samples; /* Samples decoded from RLE */
+ uint64_t full_records; /* Full stack records decoded */
+ uint64_t suffix_records; /* Suffix match records decoded */
+ uint64_t pop_push_records; /* Pop-push records decoded */
+ uint64_t total_samples; /* Total samples reconstructed */
+ uint64_t stack_reconstructions; /* Number of stack array reconstructions */
+} BinaryReaderStats;
+
+/* ============================================================================
+ * PLATFORM ABSTRACTION
+ * ============================================================================ */
+
+#if defined(__linux__) || defined(__APPLE__)
+ #include <sys/mman.h>
+ #include <unistd.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #define USE_MMAP 1
+#else
+ #define USE_MMAP 0
+#endif
+
+/* 64-bit file position support for files larger than 2GB.
+ * On POSIX: use ftello/fseeko with off_t (already 64-bit on 64-bit systems)
+ * On Windows: use _ftelli64/_fseeki64 with __int64 */
+#if defined(_WIN32) || defined(_WIN64)
+ #include <io.h>
+ typedef __int64 file_offset_t;
+ #define FTELL64(fp) _ftelli64(fp)
+ #define FSEEK64(fp, offset, whence) _fseeki64(fp, offset, whence)
+#else
+ /* POSIX - off_t is 64-bit on 64-bit systems, ftello/fseeko handle large files */
+ typedef off_t file_offset_t;
+ #define FTELL64(fp) ftello(fp)
+ #define FSEEK64(fp, offset, whence) fseeko(fp, offset, whence)
+#endif
+
+/* Forward declare zstd types if available */
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
+
+/* Branch prediction hints - same as Objects/obmalloc.c */
+#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
+# define UNLIKELY(value) __builtin_expect((value), 0)
+# define LIKELY(value) __builtin_expect((value), 1)
+#else
+# define UNLIKELY(value) (value)
+# define LIKELY(value) (value)
+#endif
+
+/* ============================================================================
+ * BINARY WRITER STRUCTURES
+ * ============================================================================ */
+
+/* zstd compression state (only used if HAVE_ZSTD defined) */
+typedef struct {
+#ifdef HAVE_ZSTD
+ ZSTD_CCtx *cctx; /* Modern API: CCtx and CStream are the same since v1.3.0 */
+#else
+ void *cctx; /* Placeholder */
+#endif
+ uint8_t *compressed_buffer;
+ size_t compressed_buffer_size;
+} ZstdCompressor;
+
+/* Frame entry - combines all frame data for better cache locality */
+typedef struct {
+ uint32_t filename_idx;
+ uint32_t funcname_idx;
+ int32_t lineno;
+} FrameEntry;
+
+/* Frame key for hash table lookup */
+typedef struct {
+ uint32_t filename_idx;
+ uint32_t funcname_idx;
+ int32_t lineno;
+} FrameKey;
+
+/* Pending RLE sample - buffered for run-length encoding */
+typedef struct {
+ uint64_t timestamp_delta;
+ uint8_t status;
+} PendingRLESample;
+
+/* Thread entry - tracks per-thread state for delta encoding */
+typedef struct {
+ uint64_t thread_id;
+ uint64_t prev_timestamp;
+ uint32_t interpreter_id;
+
+ /* Previous stack for delta encoding (frame indices, innermost first) */
+ uint32_t *prev_stack;
+ size_t prev_stack_depth;
+ size_t prev_stack_capacity;
+
+ /* RLE pending buffer - samples waiting to be written as a repeat group */
+ PendingRLESample *pending_rle;
+ size_t pending_rle_count;
+ size_t pending_rle_capacity;
+ int has_pending_rle; /* Flag: do we have buffered repeats? */
+} ThreadEntry;
+
+/* Main binary writer structure */
+typedef struct {
+ FILE *fp;
+ char *filename;
+
+ /* Write buffer for batched I/O */
+ uint8_t *write_buffer;
+ size_t buffer_pos;
+ size_t buffer_size;
+
+ /* Compression */
+ int compression_type;
+ ZstdCompressor zstd;
+
+ /* Metadata */
+ uint64_t start_time_us;
+ uint64_t sample_interval_us;
+ uint32_t total_samples;
+
+ /* String hash table: PyObject* -> uint32_t index */
+ _Py_hashtable_t *string_hash;
+ /* String storage: array of UTF-8 encoded strings */
+ char **strings;
+ size_t *string_lengths;
+ size_t string_count;
+ size_t string_capacity;
+
+ /* Frame hash table: FrameKey* -> uint32_t index */
+ _Py_hashtable_t *frame_hash;
+ /* Frame storage: combined struct for better cache locality */
+ FrameEntry *frame_entries;
+ size_t frame_count;
+ size_t frame_capacity;
+
+ /* Thread timestamp tracking for delta encoding - combined for cache locality */
+ ThreadEntry *thread_entries;
+ size_t thread_count;
+ size_t thread_capacity;
+
+ /* Statistics */
+ BinaryWriterStats stats;
+} BinaryWriter;
+
+/* ============================================================================
+ * BINARY READER STRUCTURES
+ * ============================================================================ */
+
+/* Per-thread state for stack reconstruction during replay */
+typedef struct {
+ uint64_t thread_id;
+ uint32_t interpreter_id;
+ uint64_t prev_timestamp;
+
+ /* Reconstructed stack buffer (frame indices, innermost first) */
+ uint32_t *current_stack;
+ size_t current_stack_depth;
+ size_t current_stack_capacity;
+} ReaderThreadState;
+
+/* Main binary reader structure */
+typedef struct {
+ char *filename;
+
+#if USE_MMAP
+ int fd;
+ uint8_t *mapped_data;
+ size_t mapped_size;
+#else
+ FILE *fp;
+ uint8_t *file_data;
+ size_t file_size;
+#endif
+
+ /* Decompression state */
+ int compression_type;
+ /* Note: ZSTD_DCtx is not stored - created/freed during decompression */
+ uint8_t *decompressed_data;
+ size_t decompressed_size;
+
+ /* Header metadata */
+ uint8_t py_major;
+ uint8_t py_minor;
+ uint8_t py_micro;
+ int needs_swap; /* Non-zero if file was written on different-endian system */
+ uint64_t start_time_us;
+ uint64_t sample_interval_us;
+ uint32_t sample_count;
+ uint32_t thread_count;
+ uint64_t string_table_offset;
+ uint64_t frame_table_offset;
+
+ /* Parsed string table: array of Python string objects */
+ PyObject **strings;
+ uint32_t strings_count;
+
+ /* Parsed frame table: packed as [filename_idx, funcname_idx, lineno] */
+ uint32_t *frame_data;
+ uint32_t frames_count;
+
+ /* Sample data region */
+ uint8_t *sample_data;
+ size_t sample_data_size;
+
+ /* Per-thread state for stack reconstruction (used during replay) */
+ ReaderThreadState *thread_states;
+ size_t thread_state_count;
+ size_t thread_state_capacity;
+
+ /* Statistics */
+ BinaryReaderStats stats;
+} BinaryReader;
+
+/* ============================================================================
+ * VARINT ENCODING/DECODING (INLINE FOR PERFORMANCE)
+ * ============================================================================ */
+
+/* Encode unsigned 64-bit varint (LEB128). Returns bytes written. */
+static inline size_t
+encode_varint_u64(uint8_t *buf, uint64_t value)
+{
+ /* Fast path for single-byte values (0-127) - very common case */
+ if (value < 0x80) {
+ buf[0] = (uint8_t)value;
+ return 1;
+ }
+
+ size_t i = 0;
+ while (value >= 0x80) {
+ buf[i++] = (uint8_t)((value & 0x7F) | 0x80);
+ value >>= 7;
+ }
+ buf[i++] = (uint8_t)(value & 0x7F);
+ return i;
+}
+
+/* Encode unsigned 32-bit varint. Returns bytes written. */
+static inline size_t
+encode_varint_u32(uint8_t *buf, uint32_t value)
+{
+ return encode_varint_u64(buf, value);
+}
+
+/* Encode signed 32-bit varint (zigzag encoding). Returns bytes written. */
+static inline size_t
+encode_varint_i32(uint8_t *buf, int32_t value)
+{
+ /* Zigzag encode: map signed to unsigned */
+ uint32_t zigzag = ((uint32_t)value << 1) ^ (uint32_t)(value >> 31);
+ return encode_varint_u32(buf, zigzag);
+}
+
+/* Decode unsigned 64-bit varint (LEB128). Updates offset only on success.
+ * On error (overflow or incomplete), offset is NOT updated, allowing callers
+ * to detect errors via (offset == prev_offset) check. Sets PyErr on error. */
+static inline uint64_t
+decode_varint_u64(const uint8_t *data, size_t *offset, size_t max_size)
+{
+ size_t pos = *offset;
+ uint64_t result = 0;
+ int shift = 0;
+
+ /* Fast path for single-byte varints (0-127) - most common case */
+ if (LIKELY(pos < max_size && (data[pos] & 0x80) == 0)) {
+ *offset = pos + 1;
+ return data[pos];
+ }
+
+ while (pos < max_size) {
+ uint8_t byte = data[pos++];
+ result |= (uint64_t)(byte & 0x7F) << shift;
+ if ((byte & 0x80) == 0) {
+ *offset = pos;
+ return result;
+ }
+ shift += 7;
+ if (UNLIKELY(shift >= 64)) {
+ PyErr_SetString(PyExc_ValueError, "Invalid or incomplete varint in binary data");
+ return 0;
+ }
+ }
+
+ PyErr_SetString(PyExc_ValueError, "Invalid or incomplete varint in binary data");
+ return 0;
+}
+
+/* Decode unsigned 32-bit varint. If value exceeds UINT32_MAX, treats as error. */
+static inline uint32_t
+decode_varint_u32(const uint8_t *data, size_t *offset, size_t max_size)
+{
+ size_t saved_offset = *offset;
+ uint64_t value = decode_varint_u64(data, offset, max_size);
+ if (PyErr_Occurred()) {
+ return 0;
+ }
+ if (UNLIKELY(value > UINT32_MAX)) {
+ *offset = saved_offset;
+ PyErr_SetString(PyExc_ValueError, "Invalid or incomplete varint in binary data");
+ return 0;
+ }
+ return (uint32_t)value;
+}
+
+/* Decode signed 32-bit varint (zigzag encoding). */
+static inline int32_t
+decode_varint_i32(const uint8_t *data, size_t *offset, size_t max_size)
+{
+ uint32_t zigzag = decode_varint_u32(data, offset, max_size);
+ if (PyErr_Occurred()) {
+ return 0;
+ }
+ return (int32_t)((zigzag >> 1) ^ -(int32_t)(zigzag & 1));
+}
+
+/* ============================================================================
+ * SHARED UTILITY FUNCTIONS
+ * ============================================================================ */
+
+/* Generic array growth - returns new pointer or NULL (sets PyErr_NoMemory)
+ * Includes overflow checking for capacity doubling and allocation size. */
+static inline void *
+grow_array(void *ptr, size_t *capacity, size_t elem_size)
+{
+ size_t old_cap = *capacity;
+
+ /* Check for overflow when doubling capacity */
+ if (old_cap > SIZE_MAX / 2) {
+ PyErr_SetString(PyExc_OverflowError, "Array capacity overflow");
+ return NULL;
+ }
+ size_t new_cap = old_cap * 2;
+
+ /* Check for overflow when calculating allocation size */
+ if (new_cap > SIZE_MAX / elem_size) {
+ PyErr_SetString(PyExc_OverflowError, "Array allocation size overflow");
+ return NULL;
+ }
+
+ void *new_ptr = PyMem_Realloc(ptr, new_cap * elem_size);
+ if (new_ptr) {
+ *capacity = new_cap;
+ } else {
+ PyErr_NoMemory();
+ }
+ return new_ptr;
+}
+
+static inline int
+grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_size)
+{
+ if (count < *capacity) {
+ return 0;
+ }
+ void *tmp = grow_array(*ptr_addr, capacity, elem_size);
+ if (tmp == NULL) {
+ return -1;
+ }
+ *ptr_addr = tmp;
+ return 0;
+}
+
+#define GROW_ARRAY(ptr, count, cap, type) \
+ grow_array_inplace((void**)&(ptr), (count), &(cap), sizeof(type))
+
+/* ============================================================================
+ * BINARY WRITER API
+ * ============================================================================ */
+
+/*
+ * Create a new binary writer.
+ *
+ * Arguments:
+ * filename: Path to output file
+ * sample_interval_us: Sampling interval in microseconds
+ * compression_type: COMPRESSION_NONE or COMPRESSION_ZSTD
+ * start_time_us: Start timestamp in microseconds (from time.monotonic() * 1e6)
+ *
+ * Returns:
+ * New BinaryWriter* on success, NULL on failure (PyErr set)
+ */
+BinaryWriter *binary_writer_create(
+ const char *filename,
+ uint64_t sample_interval_us,
+ int compression_type,
+ uint64_t start_time_us
+);
+
+/*
+ * Write a sample to the binary file.
+ *
+ * Arguments:
+ * writer: Writer from binary_writer_create
+ * stack_frames: List of InterpreterInfo struct sequences
+ * timestamp_us: Current timestamp in microseconds (from time.monotonic() * 1e6)
+ *
+ * Returns:
+ * 0 on success, -1 on failure (PyErr set)
+ */
+int binary_writer_write_sample(
+ BinaryWriter *writer,
+ PyObject *stack_frames,
+ uint64_t timestamp_us
+);
+
+/*
+ * Finalize and close the binary file.
+ * Writes string/frame tables, footer, and updates header.
+ *
+ * Arguments:
+ * writer: Writer to finalize
+ *
+ * Returns:
+ * 0 on success, -1 on failure (PyErr set)
+ */
+int binary_writer_finalize(BinaryWriter *writer);
+
+/*
+ * Destroy a binary writer and free all resources.
+ * Safe to call even if writer is partially initialized.
+ *
+ * Arguments:
+ * writer: Writer to destroy (may be NULL)
+ */
+void binary_writer_destroy(BinaryWriter *writer);
+
+/* ============================================================================
+ * BINARY READER API
+ * ============================================================================ */
+
+/*
+ * Open a binary file for reading.
+ *
+ * Arguments:
+ * filename: Path to input file
+ *
+ * Returns:
+ * New BinaryReader* on success, NULL on failure (PyErr set)
+ */
+BinaryReader *binary_reader_open(const char *filename);
+
+/*
+ * Replay samples from binary file through a collector.
+ *
+ * Arguments:
+ * reader: Reader from binary_reader_open
+ * collector: Python collector with collect() method
+ * progress_callback: Optional callable(current, total) or NULL
+ *
+ * Returns:
+ * Number of samples replayed on success, -1 on failure (PyErr set)
+ */
+Py_ssize_t binary_reader_replay(
+ BinaryReader *reader,
+ PyObject *collector,
+ PyObject *progress_callback
+);
+
+/*
+ * Get metadata about the binary file.
+ *
+ * Arguments:
+ * reader: Reader from binary_reader_open
+ *
+ * Returns:
+ * Dict with file metadata on success, NULL on failure (PyErr set)
+ */
+PyObject *binary_reader_get_info(BinaryReader *reader);
+
+/*
+ * Close a binary reader and free all resources.
+ *
+ * Arguments:
+ * reader: Reader to close (may be NULL)
+ */
+void binary_reader_close(BinaryReader *reader);
+
+/* ============================================================================
+ * STATISTICS FUNCTIONS
+ * ============================================================================ */
+
+/*
+ * Get writer statistics as a Python dict.
+ *
+ * Arguments:
+ * writer: Writer to get stats from
+ *
+ * Returns:
+ * Dict with statistics on success, NULL on failure (PyErr set)
+ */
+PyObject *binary_writer_get_stats(BinaryWriter *writer);
+
+/*
+ * Get reader statistics as a Python dict.
+ *
+ * Arguments:
+ * reader: Reader to get stats from
+ *
+ * Returns:
+ * Dict with statistics on success, NULL on failure (PyErr set)
+ */
+PyObject *binary_reader_get_stats(BinaryReader *reader);
+
+/* ============================================================================
+ * UTILITY FUNCTIONS
+ * ============================================================================ */
+
+/*
+ * Check if zstd compression is available.
+ *
+ * Returns:
+ * 1 if zstd available, 0 otherwise
+ */
+int binary_io_zstd_available(void);
+
+/*
+ * Get the best available compression type.
+ *
+ * Returns:
+ * COMPRESSION_ZSTD if available, COMPRESSION_NONE otherwise
+ */
+int binary_io_get_best_compression(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* Py_BINARY_IO_H */
--- /dev/null
+/******************************************************************************
+ * Python Remote Debugging Module - Binary Reader Implementation
+ *
+ * High-performance binary file reader for profiling data with optional zstd
+ * decompression.
+ ******************************************************************************/
+
+#ifndef Py_BUILD_CORE_MODULE
+# define Py_BUILD_CORE_MODULE
+#endif
+
+#include "binary_io.h"
+#include "_remote_debugging.h"
+#include "pycore_bitutils.h" /* _Py_bswap32, _Py_bswap64 for cross-endian reading */
+#include <string.h>
+
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
+
+/* ============================================================================
+ * CONSTANTS FOR BINARY FORMAT SIZES
+ * ============================================================================ */
+
+/* File structure sizes */
+#define FILE_FOOTER_SIZE 32
+#define MIN_DECOMPRESS_BUFFER_SIZE (64 * 1024) /* Minimum decompression buffer */
+
+/* Progress callback frequency */
+#define PROGRESS_CALLBACK_INTERVAL 1000
+
+/* Maximum decompression size limit (1GB) */
+#define MAX_DECOMPRESS_SIZE (1ULL << 30)
+
+/* ============================================================================
+ * BINARY READER IMPLEMENTATION
+ * ============================================================================ */
+
+static inline int
+reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
+{
+ if (file_size < FILE_HEADER_PLACEHOLDER_SIZE) {
+ PyErr_SetString(PyExc_ValueError, "File too small for header");
+ return -1;
+ }
+
+ /* Use memcpy to avoid strict aliasing violations and unaligned access */
+ uint32_t magic;
+ uint32_t version;
+ memcpy(&magic, &data[0], sizeof(magic));
+ memcpy(&version, &data[4], sizeof(version));
+
+ /* Detect endianness from magic number */
+ if (magic == BINARY_FORMAT_MAGIC) {
+ reader->needs_swap = 0;
+ } else if (magic == BINARY_FORMAT_MAGIC_SWAPPED) {
+ reader->needs_swap = 1;
+ version = _Py_bswap32(version);
+ } else {
+ PyErr_Format(PyExc_ValueError, "Invalid magic number: 0x%08x", magic);
+ return -1;
+ }
+
+ if (version != BINARY_FORMAT_VERSION) {
+ if (version > BINARY_FORMAT_VERSION && file_size >= HDR_OFF_PY_MICRO + 1) {
+ /* Newer format - try to read Python version for better error */
+ uint8_t py_major = data[HDR_OFF_PY_MAJOR];
+ uint8_t py_minor = data[HDR_OFF_PY_MINOR];
+ uint8_t py_micro = data[HDR_OFF_PY_MICRO];
+ PyErr_Format(PyExc_ValueError,
+ "Binary file was created with Python %u.%u.%u (format version %u), "
+ "but this is Python %d.%d.%d (format version %d)",
+ py_major, py_minor, py_micro, version,
+ PY_MAJOR_VERSION, PY_MINOR_VERSION, PY_MICRO_VERSION,
+ BINARY_FORMAT_VERSION);
+ } else {
+ PyErr_Format(PyExc_ValueError,
+ "Unsupported format version %u (this reader supports version %d)",
+ version, BINARY_FORMAT_VERSION);
+ }
+ return -1;
+ }
+
+ reader->py_major = data[HDR_OFF_PY_MAJOR];
+ reader->py_minor = data[HDR_OFF_PY_MINOR];
+ reader->py_micro = data[HDR_OFF_PY_MICRO];
+
+ /* Read header fields with byte-swapping if needed */
+ uint64_t start_time_us, sample_interval_us, string_table_offset, frame_table_offset;
+ uint32_t sample_count, thread_count, compression_type;
+
+ memcpy(&start_time_us, &data[HDR_OFF_START_TIME], HDR_SIZE_START_TIME);
+ memcpy(&sample_interval_us, &data[HDR_OFF_INTERVAL], HDR_SIZE_INTERVAL);
+ memcpy(&sample_count, &data[HDR_OFF_SAMPLES], HDR_SIZE_SAMPLES);
+ memcpy(&thread_count, &data[HDR_OFF_THREADS], HDR_SIZE_THREADS);
+ memcpy(&string_table_offset, &data[HDR_OFF_STR_TABLE], HDR_SIZE_STR_TABLE);
+ memcpy(&frame_table_offset, &data[HDR_OFF_FRAME_TABLE], HDR_SIZE_FRAME_TABLE);
+ memcpy(&compression_type, &data[HDR_OFF_COMPRESSION], HDR_SIZE_COMPRESSION);
+
+ reader->start_time_us = SWAP64_IF(reader->needs_swap, start_time_us);
+ reader->sample_interval_us = SWAP64_IF(reader->needs_swap, sample_interval_us);
+ reader->sample_count = SWAP32_IF(reader->needs_swap, sample_count);
+ reader->thread_count = SWAP32_IF(reader->needs_swap, thread_count);
+ reader->string_table_offset = SWAP64_IF(reader->needs_swap, string_table_offset);
+ reader->frame_table_offset = SWAP64_IF(reader->needs_swap, frame_table_offset);
+ reader->compression_type = (int)SWAP32_IF(reader->needs_swap, compression_type);
+
+ return 0;
+}
+
+static inline int
+reader_parse_footer(BinaryReader *reader, const uint8_t *data, size_t file_size)
+{
+ if (file_size < FILE_FOOTER_SIZE) {
+ PyErr_SetString(PyExc_ValueError, "File too small for footer");
+ return -1;
+ }
+
+ const uint8_t *footer = data + file_size - FILE_FOOTER_SIZE;
+ /* Use memcpy to avoid strict aliasing violations */
+ uint32_t strings_count, frames_count;
+ memcpy(&strings_count, &footer[0], sizeof(strings_count));
+ memcpy(&frames_count, &footer[4], sizeof(frames_count));
+
+ reader->strings_count = SWAP32_IF(reader->needs_swap, strings_count);
+ reader->frames_count = SWAP32_IF(reader->needs_swap, frames_count);
+
+ return 0;
+}
+
+#ifdef HAVE_ZSTD
+/* Maximum decompression buffer size to prevent memory exhaustion (1GB) */
+#define MAX_DECOMPRESS_SIZE (1ULL << 30)
+
+static inline int
+reader_decompress_samples(BinaryReader *reader, const uint8_t *data)
+{
+ size_t compressed_size = reader->string_table_offset - FILE_HEADER_PLACEHOLDER_SIZE;
+ const uint8_t *compressed_data = data + FILE_HEADER_PLACEHOLDER_SIZE;
+
+ /* Validate compressed data region */
+ if (reader->string_table_offset < FILE_HEADER_PLACEHOLDER_SIZE) {
+ PyErr_SetString(PyExc_ValueError, "Invalid string table offset");
+ return -1;
+ }
+
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
+ if (!dctx) {
+ PyErr_SetString(PyExc_MemoryError, "Failed to create zstd decompression context");
+ return -1;
+ }
+
+ /* Try to get exact decompressed size from frame header for optimal allocation */
+ unsigned long long frame_content_size = ZSTD_getFrameContentSize(compressed_data, compressed_size);
+ size_t alloc_size;
+
+ if (frame_content_size == ZSTD_CONTENTSIZE_ERROR) {
+ /* Corrupted frame header - fail early */
+ ZSTD_freeDCtx(dctx);
+ PyErr_SetString(PyExc_ValueError, "Corrupted zstd frame header");
+ return -1;
+ } else if (frame_content_size != ZSTD_CONTENTSIZE_UNKNOWN &&
+ frame_content_size <= SIZE_MAX &&
+ frame_content_size <= MAX_DECOMPRESS_SIZE) {
+ alloc_size = (size_t)frame_content_size;
+ } else {
+ alloc_size = ZSTD_DStreamOutSize() * 4;
+ if (alloc_size < MIN_DECOMPRESS_BUFFER_SIZE) {
+ alloc_size = MIN_DECOMPRESS_BUFFER_SIZE;
+ }
+ }
+
+ reader->decompressed_data = PyMem_Malloc(alloc_size);
+ if (!reader->decompressed_data) {
+ ZSTD_freeDCtx(dctx);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ ZSTD_inBuffer input = { compressed_data, compressed_size, 0 };
+ size_t total_output = 0;
+ size_t last_result = 0;
+
+ while (input.pos < input.size) {
+ if (total_output >= alloc_size) {
+ /* Check for overflow before doubling */
+ if (alloc_size > SIZE_MAX / 2 || alloc_size * 2 > MAX_DECOMPRESS_SIZE) {
+ PyMem_Free(reader->decompressed_data);
+ reader->decompressed_data = NULL;
+ ZSTD_freeDCtx(dctx);
+ PyErr_SetString(PyExc_MemoryError, "Decompressed data exceeds maximum size");
+ return -1;
+ }
+ size_t new_size = alloc_size * 2;
+ uint8_t *new_buf = PyMem_Realloc(reader->decompressed_data, new_size);
+ if (!new_buf) {
+ PyMem_Free(reader->decompressed_data);
+ reader->decompressed_data = NULL;
+ ZSTD_freeDCtx(dctx);
+ PyErr_NoMemory();
+ return -1;
+ }
+ reader->decompressed_data = new_buf;
+ alloc_size = new_size;
+ }
+
+ ZSTD_outBuffer output = {
+ reader->decompressed_data + total_output,
+ alloc_size - total_output,
+ 0
+ };
+
+ last_result = ZSTD_decompressStream(dctx, &output, &input);
+ if (ZSTD_isError(last_result)) {
+ PyMem_Free(reader->decompressed_data);
+ reader->decompressed_data = NULL;
+ ZSTD_freeDCtx(dctx);
+ PyErr_Format(PyExc_ValueError, "zstd decompression error: %s",
+ ZSTD_getErrorName(last_result));
+ return -1;
+ }
+
+ total_output += output.pos;
+ }
+
+ /* Verify decompression is complete (last_result == 0 means frame is complete) */
+ if (last_result != 0) {
+ PyMem_Free(reader->decompressed_data);
+ reader->decompressed_data = NULL;
+ ZSTD_freeDCtx(dctx);
+ PyErr_SetString(PyExc_ValueError, "Incomplete zstd frame: data may be truncated");
+ return -1;
+ }
+
+ ZSTD_freeDCtx(dctx);
+ reader->decompressed_size = total_output;
+ reader->sample_data = reader->decompressed_data;
+ reader->sample_data_size = reader->decompressed_size;
+
+ return 0;
+}
+#endif
+
+static inline int
+reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file_size)
+{
+ reader->strings = PyMem_Calloc(reader->strings_count, sizeof(PyObject *));
+ if (!reader->strings && reader->strings_count > 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ size_t offset = reader->string_table_offset;
+ for (uint32_t i = 0; i < reader->strings_count; i++) {
+ size_t prev_offset = offset;
+ uint32_t str_len = decode_varint_u32(data, &offset, file_size);
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in string table");
+ return -1;
+ }
+ if (offset + str_len > file_size) {
+ PyErr_SetString(PyExc_ValueError, "String table overflow");
+ return -1;
+ }
+
+ reader->strings[i] = PyUnicode_DecodeUTF8((char *)&data[offset], str_len, "replace");
+ if (!reader->strings[i]) {
+ return -1;
+ }
+ offset += str_len;
+ }
+
+ return 0;
+}
+
+static inline int
+reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_size)
+{
+ /* Check for integer overflow in allocation size calculation.
+ Only needed on 32-bit where SIZE_MAX can be exceeded by uint32_t * 12. */
+#if SIZEOF_SIZE_T < 8
+ if (reader->frames_count > SIZE_MAX / (3 * sizeof(uint32_t))) {
+ PyErr_SetString(PyExc_OverflowError, "Frame count too large for allocation");
+ return -1;
+ }
+#endif
+
+ size_t alloc_size = (size_t)reader->frames_count * 3 * sizeof(uint32_t);
+ reader->frame_data = PyMem_Malloc(alloc_size);
+ if (!reader->frame_data && reader->frames_count > 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ size_t offset = reader->frame_table_offset;
+ for (uint32_t i = 0; i < reader->frames_count; i++) {
+ size_t base = (size_t)i * 3;
+ size_t prev_offset;
+
+ prev_offset = offset;
+ reader->frame_data[base] = decode_varint_u32(data, &offset, file_size);
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (filename)");
+ return -1;
+ }
+
+ prev_offset = offset;
+ reader->frame_data[base + 1] = decode_varint_u32(data, &offset, file_size);
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (funcname)");
+ return -1;
+ }
+
+ prev_offset = offset;
+ reader->frame_data[base + 2] = (uint32_t)decode_varint_i32(data, &offset, file_size);
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (lineno)");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+BinaryReader *
+binary_reader_open(const char *filename)
+{
+ BinaryReader *reader = PyMem_Calloc(1, sizeof(BinaryReader));
+ if (!reader) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+#if USE_MMAP
+ reader->fd = -1; /* Explicit initialization for cleanup safety */
+#endif
+
+ reader->filename = PyMem_Malloc(strlen(filename) + 1);
+ if (!reader->filename) {
+ PyMem_Free(reader);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ strcpy(reader->filename, filename);
+
+#if USE_MMAP
+ /* Open with mmap on Unix */
+ reader->fd = open(filename, O_RDONLY);
+ if (reader->fd < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ goto error;
+ }
+
+ struct stat st;
+ if (fstat(reader->fd, &st) < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+ reader->mapped_size = st.st_size;
+
+ /* Map the file into memory.
+ * MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
+ * - Catches issues (e.g., file truncation) immediately rather than as SIGBUS during reads
+ * - Eliminates page faults during subsequent reads for better performance
+ */
+#ifdef __linux__
+ reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ,
+ MAP_PRIVATE | MAP_POPULATE, reader->fd, 0);
+#else
+ reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ,
+ MAP_PRIVATE, reader->fd, 0);
+#endif
+ if (reader->mapped_data == MAP_FAILED) {
+ reader->mapped_data = NULL;
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+
+ /* Hint sequential access pattern - failures are non-fatal */
+ (void)madvise(reader->mapped_data, reader->mapped_size, MADV_SEQUENTIAL);
+
+ /* Pre-fetch pages into memory - failures are non-fatal.
+ * Complements MAP_POPULATE on Linux, provides benefit on macOS. */
+ (void)madvise(reader->mapped_data, reader->mapped_size, MADV_WILLNEED);
+
+ /* Use transparent huge pages for large files to reduce TLB misses.
+ * Only beneficial for files >= 32MB where TLB pressure matters. */
+#ifdef MADV_HUGEPAGE
+ if (reader->mapped_size >= (32 * 1024 * 1024)) {
+ (void)madvise(reader->mapped_data, reader->mapped_size, MADV_HUGEPAGE);
+ }
+#endif
+
+ /* Add file descriptor-level hints for better kernel I/O scheduling */
+#if defined(__linux__) && defined(POSIX_FADV_SEQUENTIAL)
+ (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+ if (reader->mapped_size > (64 * 1024 * 1024)) {
+ (void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_WILLNEED);
+ }
+#endif
+
+ uint8_t *data = reader->mapped_data;
+ size_t file_size = reader->mapped_size;
+#else
+ /* Use stdio on Windows */
+ reader->fp = fopen(filename, "rb");
+ if (!reader->fp) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ goto error;
+ }
+
+ if (FSEEK64(reader->fp, 0, SEEK_END) != 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+ file_offset_t file_size_off = FTELL64(reader->fp);
+ if (file_size_off < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+ reader->file_size = (size_t)file_size_off;
+ if (FSEEK64(reader->fp, 0, SEEK_SET) != 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+
+ reader->file_data = PyMem_Malloc(reader->file_size);
+ if (!reader->file_data) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ if (fread(reader->file_data, 1, reader->file_size, reader->fp) != reader->file_size) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ goto error;
+ }
+
+ uint8_t *data = reader->file_data;
+ size_t file_size = reader->file_size;
+#endif
+
+ /* Parse header and footer */
+ if (reader_parse_header(reader, data, file_size) < 0) {
+ goto error;
+ }
+ if (reader_parse_footer(reader, data, file_size) < 0) {
+ goto error;
+ }
+
+ /* Validate table offsets are within file bounds */
+ if (reader->string_table_offset > file_size) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid string table offset: %llu exceeds file size %zu",
+ (unsigned long long)reader->string_table_offset, file_size);
+ goto error;
+ }
+ if (reader->frame_table_offset > file_size) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid frame table offset: %llu exceeds file size %zu",
+ (unsigned long long)reader->frame_table_offset, file_size);
+ goto error;
+ }
+ if (reader->string_table_offset < FILE_HEADER_PLACEHOLDER_SIZE) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid string table offset: %llu is before data section",
+ (unsigned long long)reader->string_table_offset);
+ goto error;
+ }
+ if (reader->frame_table_offset < FILE_HEADER_PLACEHOLDER_SIZE) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid frame table offset: %llu is before data section",
+ (unsigned long long)reader->frame_table_offset);
+ goto error;
+ }
+ if (reader->string_table_offset > reader->frame_table_offset) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid table offsets: string table (%llu) is after frame table (%llu)",
+ (unsigned long long)reader->string_table_offset,
+ (unsigned long long)reader->frame_table_offset);
+ goto error;
+ }
+
+ /* Handle compressed data */
+ if (reader->compression_type == COMPRESSION_ZSTD) {
+#ifdef HAVE_ZSTD
+ if (reader_decompress_samples(reader, data) < 0) {
+ goto error;
+ }
+#else
+ PyErr_SetString(PyExc_RuntimeError,
+ "File uses zstd compression but zstd support not compiled in");
+ goto error;
+#endif
+ } else {
+ reader->sample_data = data + FILE_HEADER_PLACEHOLDER_SIZE;
+ reader->sample_data_size = reader->string_table_offset - FILE_HEADER_PLACEHOLDER_SIZE;
+ }
+
+ /* Parse string and frame tables */
+ if (reader_parse_string_table(reader, data, file_size) < 0) {
+ goto error;
+ }
+ if (reader_parse_frame_table(reader, data, file_size) < 0) {
+ goto error;
+ }
+
+ return reader;
+
+error:
+ binary_reader_close(reader);
+ return NULL;
+}
+
+/* Get or create reader thread state for stack reconstruction */
+static ReaderThreadState *
+reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id,
+ uint32_t interpreter_id)
+{
+ /* Search existing threads (key is thread_id + interpreter_id) */
+ for (size_t i = 0; i < reader->thread_state_count; i++) {
+ if (reader->thread_states[i].thread_id == thread_id &&
+ reader->thread_states[i].interpreter_id == interpreter_id) {
+ return &reader->thread_states[i];
+ }
+ }
+
+ if (!reader->thread_states) {
+ reader->thread_state_capacity = 16;
+ reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState));
+ if (!reader->thread_states) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ } else if (reader->thread_state_count >= reader->thread_state_capacity) {
+ reader->thread_states = grow_array(reader->thread_states,
+ &reader->thread_state_capacity,
+ sizeof(ReaderThreadState));
+ if (!reader->thread_states) {
+ return NULL;
+ }
+ }
+
+ ReaderThreadState *ts = &reader->thread_states[reader->thread_state_count++];
+ memset(ts, 0, sizeof(ReaderThreadState));
+ ts->thread_id = thread_id;
+ ts->interpreter_id = interpreter_id;
+ ts->prev_timestamp = reader->start_time_us;
+ ts->current_stack_capacity = MAX_STACK_DEPTH;
+ ts->current_stack = PyMem_Malloc(ts->current_stack_capacity * sizeof(uint32_t));
+ if (!ts->current_stack) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ return ts;
+}
+
+/* ============================================================================
+ * STACK DECODING HELPERS
+ * ============================================================================ */
+
+/* Decode a full stack from sample data.
+ * Updates ts->current_stack and ts->current_stack_depth.
+ * Returns 0 on success, -1 on error (bounds violation). */
+static inline int
+decode_stack_full(ReaderThreadState *ts, const uint8_t *data,
+ size_t *offset, size_t max_size)
+{
+ uint32_t depth = decode_varint_u32(data, offset, max_size);
+
+ /* Validate depth against capacity to prevent buffer overflow */
+ if (depth > ts->current_stack_capacity) {
+ PyErr_Format(PyExc_ValueError,
+ "Stack depth %u exceeds capacity %zu", depth, ts->current_stack_capacity);
+ return -1;
+ }
+
+ ts->current_stack_depth = depth;
+ for (uint32_t i = 0; i < depth; i++) {
+ ts->current_stack[i] = decode_varint_u32(data, offset, max_size);
+ }
+ return 0;
+}
+
+/* Decode a suffix-encoded stack from sample data.
+ * The suffix encoding shares frames from the bottom of the previous stack.
+ * Returns 0 on success, -1 on error (bounds violation). */
+static inline int
+decode_stack_suffix(ReaderThreadState *ts, const uint8_t *data,
+ size_t *offset, size_t max_size)
+{
+ uint32_t shared = decode_varint_u32(data, offset, max_size);
+ uint32_t new_count = decode_varint_u32(data, offset, max_size);
+
+ /* Validate shared doesn't exceed current stack depth */
+ if (shared > ts->current_stack_depth) {
+ PyErr_Format(PyExc_ValueError,
+ "Shared count %u exceeds current stack depth %zu",
+ shared, ts->current_stack_depth);
+ return -1;
+ }
+
+ /* Validate final depth doesn't exceed capacity */
+ size_t final_depth = (size_t)shared + new_count;
+ if (final_depth > ts->current_stack_capacity) {
+ PyErr_Format(PyExc_ValueError,
+ "Final stack depth %zu exceeds capacity %zu",
+ final_depth, ts->current_stack_capacity);
+ return -1;
+ }
+
+ /* Move shared frames (from bottom of stack) to make room for new frames at the top */
+ if (new_count > 0 && shared > 0) {
+ /* Defensive check: ensure subtraction won't underflow.
+ * This should already be guaranteed by the check above, but we add
+ * this assertion as defense-in-depth against stack corruption. */
+ if (ts->current_stack_depth < shared) {
+ PyErr_SetString(PyExc_ValueError,
+ "Internal error: stack corruption detected in suffix decoding");
+ return -1;
+ }
+ size_t prev_shared_start = ts->current_stack_depth - shared;
+ memmove(&ts->current_stack[new_count],
+ &ts->current_stack[prev_shared_start],
+ shared * sizeof(uint32_t));
+ }
+
+ for (uint32_t i = 0; i < new_count; i++) {
+ ts->current_stack[i] = decode_varint_u32(data, offset, max_size);
+ }
+ ts->current_stack_depth = final_depth;
+ return 0;
+}
+
+/* Decode a pop-push encoded stack from sample data.
+ * Pops frames from the top and pushes new frames.
+ * Returns 0 on success, -1 on error (bounds violation). */
+static inline int
+decode_stack_pop_push(ReaderThreadState *ts, const uint8_t *data,
+ size_t *offset, size_t max_size)
+{
+ uint32_t pop = decode_varint_u32(data, offset, max_size);
+ uint32_t push = decode_varint_u32(data, offset, max_size);
+ size_t keep = (ts->current_stack_depth > pop) ? ts->current_stack_depth - pop : 0;
+
+ /* Validate final depth doesn't exceed capacity */
+ size_t final_depth = keep + push;
+ if (final_depth > ts->current_stack_capacity) {
+ PyErr_Format(PyExc_ValueError,
+ "Final stack depth %zu exceeds capacity %zu",
+ final_depth, ts->current_stack_capacity);
+ return -1;
+ }
+
+ /* Move kept frames (from bottom of stack) to make room for new frames at the top.
+ * Even when push == 0, we need to move kept frames to index 0 if pop > 0. */
+ if (keep > 0) {
+ memmove(&ts->current_stack[push],
+ &ts->current_stack[pop],
+ keep * sizeof(uint32_t));
+ }
+
+ for (uint32_t i = 0; i < push; i++) {
+ ts->current_stack[i] = decode_varint_u32(data, offset, max_size);
+ }
+ ts->current_stack_depth = final_depth;
+ return 0;
+}
+
+/* Build a Python list of FrameInfo objects from frame indices */
+static PyObject *
+build_frame_list(RemoteDebuggingState *state, BinaryReader *reader,
+ const uint32_t *frame_indices, size_t stack_depth)
+{
+ PyObject *frame_list = PyList_New(stack_depth);
+ if (!frame_list) {
+ return NULL;
+ }
+
+ for (size_t k = 0; k < stack_depth; k++) {
+ uint32_t frame_idx = frame_indices[k];
+ if (frame_idx >= reader->frames_count) {
+ PyErr_Format(PyExc_ValueError, "Invalid frame index: %u", frame_idx);
+ goto error;
+ }
+
+ size_t base = frame_idx * 3;
+ uint32_t filename_idx = reader->frame_data[base];
+ uint32_t funcname_idx = reader->frame_data[base + 1];
+ int32_t lineno = (int32_t)reader->frame_data[base + 2];
+
+ if (filename_idx >= reader->strings_count ||
+ funcname_idx >= reader->strings_count) {
+ PyErr_SetString(PyExc_ValueError, "Invalid string index in frame");
+ goto error;
+ }
+
+ PyObject *frame_info = PyStructSequence_New(state->FrameInfo_Type);
+ if (!frame_info) {
+ goto error;
+ }
+
+ PyObject *location;
+ if (lineno > 0) {
+ location = Py_BuildValue("(iiii)", lineno, lineno, 0, 0);
+ if (!location) {
+ Py_DECREF(frame_info);
+ goto error;
+ }
+ }
+ else {
+ location = Py_NewRef(Py_None);
+ }
+
+ PyStructSequence_SetItem(frame_info, 0, Py_NewRef(reader->strings[filename_idx]));
+ PyStructSequence_SetItem(frame_info, 1, location);
+ PyStructSequence_SetItem(frame_info, 2, Py_NewRef(reader->strings[funcname_idx]));
+ PyStructSequence_SetItem(frame_info, 3, Py_NewRef(Py_None));
+ PyList_SET_ITEM(frame_list, k, frame_info);
+ }
+
+ return frame_list;
+
+error:
+ Py_DECREF(frame_list);
+ return NULL;
+}
+
+/* Helper to build sample_list from frame indices (shared by emit functions) */
+static PyObject *
+build_sample_list(RemoteDebuggingState *state, BinaryReader *reader,
+ uint64_t thread_id, uint32_t interpreter_id, uint8_t status,
+ const uint32_t *frame_indices, size_t stack_depth)
+{
+ PyObject *frame_list = NULL, *thread_info = NULL, *thread_list = NULL;
+ PyObject *interp_info = NULL, *sample_list = NULL;
+
+ frame_list = build_frame_list(state, reader, frame_indices, stack_depth);
+ if (!frame_list) {
+ goto error;
+ }
+
+ thread_info = PyStructSequence_New(state->ThreadInfo_Type);
+ if (!thread_info) {
+ goto error;
+ }
+ PyObject *tid = PyLong_FromUnsignedLongLong(thread_id);
+ if (!tid) {
+ goto error;
+ }
+ PyObject *st = PyLong_FromLong(status);
+ if (!st) {
+ Py_DECREF(tid);
+ goto error;
+ }
+ PyStructSequence_SetItem(thread_info, 0, tid);
+ PyStructSequence_SetItem(thread_info, 1, st);
+ PyStructSequence_SetItem(thread_info, 2, frame_list);
+ frame_list = NULL; /* ownership transferred */
+
+ thread_list = PyList_New(1);
+ if (!thread_list) {
+ goto error;
+ }
+ PyList_SET_ITEM(thread_list, 0, thread_info);
+ thread_info = NULL;
+
+ interp_info = PyStructSequence_New(state->InterpreterInfo_Type);
+ if (!interp_info) {
+ goto error;
+ }
+ PyObject *iid = PyLong_FromUnsignedLong(interpreter_id);
+ if (!iid) {
+ goto error;
+ }
+ PyStructSequence_SetItem(interp_info, 0, iid);
+ PyStructSequence_SetItem(interp_info, 1, thread_list);
+ thread_list = NULL;
+
+ sample_list = PyList_New(1);
+ if (!sample_list) {
+ goto error;
+ }
+ PyList_SET_ITEM(sample_list, 0, interp_info);
+ return sample_list;
+
+error:
+ Py_XDECREF(sample_list);
+ Py_XDECREF(interp_info);
+ Py_XDECREF(thread_list);
+ Py_XDECREF(thread_info);
+ Py_XDECREF(frame_list);
+ return NULL;
+}
+
+/* Helper to emit a sample to the collector. timestamps_list is borrowed. */
+static int
+emit_sample(RemoteDebuggingState *state, PyObject *collector,
+ uint64_t thread_id, uint32_t interpreter_id, uint8_t status,
+ const uint32_t *frame_indices, size_t stack_depth,
+ BinaryReader *reader, PyObject *timestamps_list)
+{
+ PyObject *sample_list = build_sample_list(state, reader, thread_id,
+ interpreter_id, status,
+ frame_indices, stack_depth);
+ if (!sample_list) {
+ return -1;
+ }
+
+ PyObject *result = PyObject_CallMethod(collector, "collect", "OO", sample_list, timestamps_list);
+ Py_DECREF(sample_list);
+
+ if (!result) {
+ return -1;
+ }
+ Py_DECREF(result);
+ return 0;
+}
+
+/* Helper to trim timestamp list and emit batch. Returns 0 on success, -1 on error. */
+static int
+emit_batch(RemoteDebuggingState *state, PyObject *collector,
+ uint64_t thread_id, uint32_t interpreter_id, uint8_t status,
+ const uint32_t *frame_indices, size_t stack_depth,
+ BinaryReader *reader, PyObject *timestamps_list, Py_ssize_t actual_size)
+{
+ /* Trim list to actual size */
+ if (PyList_SetSlice(timestamps_list, actual_size, PyList_GET_SIZE(timestamps_list), NULL) < 0) {
+ return -1;
+ }
+ return emit_sample(state, collector, thread_id, interpreter_id, status,
+ frame_indices, stack_depth, reader, timestamps_list);
+}
+
+/* Helper to invoke progress callback, returns -1 on error */
+static inline int
+invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
+{
+ if (callback && callback != Py_None) {
+ PyObject *result = PyObject_CallFunction(callback, "nI", current, total);
+ if (result) {
+ Py_DECREF(result);
+ } else {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+Py_ssize_t
+binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progress_callback)
+{
+ if (!PyObject_HasAttrString(collector, "collect")) {
+ PyErr_SetString(PyExc_TypeError, "Collector must have a collect() method");
+ return -1;
+ }
+
+ /* Get module state for struct sequence types */
+ PyObject *module = PyImport_ImportModule("_remote_debugging");
+ if (!module) {
+ return -1;
+ }
+ RemoteDebuggingState *state = RemoteDebugging_GetState(module);
+ Py_DECREF(module);
+
+ if (!state) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to get module state");
+ return -1;
+ }
+
+ size_t offset = 0;
+ Py_ssize_t replayed = 0;
+
+ /* Initial progress callback at 0% */
+ if (invoke_progress_callback(progress_callback, 0, reader->sample_count) < 0) {
+ return -1;
+ }
+
+ while (offset < reader->sample_data_size) {
+ /* Read thread_id (8 bytes) + interpreter_id (4 bytes) */
+ if (offset + 13 > reader->sample_data_size) {
+ break; /* End of data */
+ }
+
+ /* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
+ uint64_t thread_id_raw;
+ uint32_t interpreter_id_raw;
+ memcpy(&thread_id_raw, &reader->sample_data[offset], sizeof(thread_id_raw));
+ offset += 8;
+
+ memcpy(&interpreter_id_raw, &reader->sample_data[offset], sizeof(interpreter_id_raw));
+ offset += 4;
+
+ uint64_t thread_id = SWAP64_IF(reader->needs_swap, thread_id_raw);
+ uint32_t interpreter_id = SWAP32_IF(reader->needs_swap, interpreter_id_raw);
+
+ /* Get or create thread state for reconstruction */
+ ReaderThreadState *ts = reader_get_or_create_thread_state(reader, thread_id, interpreter_id);
+ if (!ts) {
+ return -1;
+ }
+
+ /* Read encoding byte */
+ uint8_t encoding = reader->sample_data[offset++];
+
+ switch (encoding) {
+ case STACK_REPEAT: {
+ /* RLE repeat: [count: varint] [delta: varint, status: 1]... */
+ size_t prev_offset = offset;
+ uint32_t count = decode_varint_u32(reader->sample_data, &offset, reader->sample_data_size);
+ /* Detect varint decode failure */
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint for RLE count");
+ return -1;
+ }
+
+ /* Validate RLE count to prevent DoS from malicious files.
+ * Each RLE sample needs at least 2 bytes (1 byte min varint + 1 status byte).
+ * Also reject absurdly large counts that would exhaust memory. */
+ size_t remaining_data = reader->sample_data_size - offset;
+ size_t max_possible_samples = remaining_data / 2;
+ if (count > max_possible_samples) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid RLE count %u exceeds maximum possible %zu for remaining data",
+ count, max_possible_samples);
+ return -1;
+ }
+
+ reader->stats.repeat_records++;
+ reader->stats.repeat_samples += count;
+
+ /* Process RLE samples, batching by status */
+ PyObject *timestamps_list = NULL;
+ uint8_t batch_status = 0;
+ Py_ssize_t batch_idx = 0;
+
+ for (uint32_t i = 0; i < count; i++) {
+ size_t delta_prev_offset = offset;
+ uint64_t delta = decode_varint_u64(reader->sample_data, &offset, reader->sample_data_size);
+ if (offset == delta_prev_offset) {
+ Py_XDECREF(timestamps_list);
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in RLE sample data");
+ return -1;
+ }
+ if (offset >= reader->sample_data_size) {
+ Py_XDECREF(timestamps_list);
+ PyErr_SetString(PyExc_ValueError, "Unexpected end of sample data in RLE");
+ return -1;
+ }
+ uint8_t status = reader->sample_data[offset++];
+ ts->prev_timestamp += delta;
+
+ /* Start new batch on first sample or status change */
+ if (i == 0 || status != batch_status) {
+ if (timestamps_list) {
+ int rc = emit_batch(state, collector, thread_id, interpreter_id,
+ batch_status, ts->current_stack, ts->current_stack_depth,
+ reader, timestamps_list, batch_idx);
+ Py_DECREF(timestamps_list);
+ if (rc < 0) {
+ return -1;
+ }
+ }
+ timestamps_list = PyList_New(count - i);
+ if (!timestamps_list) {
+ return -1;
+ }
+ batch_status = status;
+ batch_idx = 0;
+ }
+
+ PyObject *ts_obj = PyLong_FromUnsignedLongLong(ts->prev_timestamp);
+ if (!ts_obj) {
+ Py_DECREF(timestamps_list);
+ return -1;
+ }
+ PyList_SET_ITEM(timestamps_list, batch_idx++, ts_obj);
+ }
+
+ /* Emit final batch */
+ if (timestamps_list) {
+ int rc = emit_batch(state, collector, thread_id, interpreter_id,
+ batch_status, ts->current_stack, ts->current_stack_depth,
+ reader, timestamps_list, batch_idx);
+ Py_DECREF(timestamps_list);
+ if (rc < 0) {
+ return -1;
+ }
+ }
+
+ replayed += count;
+ reader->stats.total_samples += count;
+
+ /* Progress callback after batch */
+ if (replayed % PROGRESS_CALLBACK_INTERVAL < count) {
+ if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
+ return -1;
+ }
+ }
+ break;
+ }
+
+ case STACK_FULL:
+ case STACK_SUFFIX:
+ case STACK_POP_PUSH: {
+ /* All three encodings share: [delta: varint] [status: 1] ... */
+ size_t prev_offset = offset;
+ uint64_t delta = decode_varint_u64(reader->sample_data, &offset, reader->sample_data_size);
+ /* Detect varint decode failure: offset unchanged means error */
+ if (offset == prev_offset) {
+ PyErr_SetString(PyExc_ValueError, "Malformed varint in sample data");
+ return -1;
+ }
+ if (offset >= reader->sample_data_size) {
+ PyErr_SetString(PyExc_ValueError, "Unexpected end of sample data");
+ return -1;
+ }
+ uint8_t status = reader->sample_data[offset++];
+ ts->prev_timestamp += delta;
+
+ if (encoding == STACK_FULL) {
+ if (decode_stack_full(ts, reader->sample_data, &offset, reader->sample_data_size) < 0) {
+ return -1;
+ }
+ reader->stats.full_records++;
+ } else if (encoding == STACK_SUFFIX) {
+ if (decode_stack_suffix(ts, reader->sample_data, &offset, reader->sample_data_size) < 0) {
+ return -1;
+ }
+ reader->stats.suffix_records++;
+ } else { /* STACK_POP_PUSH */
+ if (decode_stack_pop_push(ts, reader->sample_data, &offset, reader->sample_data_size) < 0) {
+ return -1;
+ }
+ reader->stats.pop_push_records++;
+ }
+ reader->stats.stack_reconstructions++;
+
+ /* Build single-element timestamp list */
+ PyObject *ts_obj = PyLong_FromUnsignedLongLong(ts->prev_timestamp);
+ if (!ts_obj) {
+ return -1;
+ }
+ PyObject *timestamps_list = PyList_New(1);
+ if (!timestamps_list) {
+ Py_DECREF(ts_obj);
+ return -1;
+ }
+ PyList_SET_ITEM(timestamps_list, 0, ts_obj);
+
+ if (emit_sample(state, collector, thread_id, interpreter_id, status,
+ ts->current_stack, ts->current_stack_depth, reader,
+ timestamps_list) < 0) {
+ Py_DECREF(timestamps_list);
+ return -1;
+ }
+ Py_DECREF(timestamps_list);
+ replayed++;
+ reader->stats.total_samples++;
+ break;
+ }
+
+ default:
+ PyErr_Format(PyExc_ValueError, "Unknown stack encoding: %u", encoding);
+ return -1;
+ }
+
+ /* Progress callback */
+ if (replayed % PROGRESS_CALLBACK_INTERVAL == 0) {
+ if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* Final progress callback at 100% */
+ if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
+ return -1;
+ }
+
+ return replayed;
+}
+
+PyObject *
+binary_reader_get_info(BinaryReader *reader)
+{
+ PyObject *py_version = Py_BuildValue("(B,B,B)",
+ reader->py_major, reader->py_minor, reader->py_micro);
+ if (py_version == NULL) {
+ return NULL;
+ }
+ return Py_BuildValue(
+ "{s:I, s:N, s:K, s:K, s:I, s:I, s:I, s:I, s:i}",
+ "version", BINARY_FORMAT_VERSION,
+ "python_version", py_version,
+ "start_time_us", reader->start_time_us,
+ "sample_interval_us", reader->sample_interval_us,
+ "sample_count", reader->sample_count,
+ "thread_count", reader->thread_count,
+ "string_count", reader->strings_count,
+ "frame_count", reader->frames_count,
+ "compression_type", reader->compression_type
+ );
+}
+
+PyObject *
+binary_writer_get_stats(BinaryWriter *writer)
+{
+ BinaryWriterStats *s = &writer->stats;
+
+ /* Calculate derived stats */
+ uint64_t total_records = s->repeat_records + s->full_records +
+ s->suffix_records + s->pop_push_records;
+ uint64_t total_samples = writer->total_samples;
+ uint64_t potential_frames = s->total_frames_written + s->frames_saved;
+ double compression_ratio = (potential_frames > 0) ?
+ (double)s->frames_saved / potential_frames * 100.0 : 0.0;
+
+ return Py_BuildValue(
+ "{s:K, s:K, s:K, s:K, s:K, s:K, s:K, s:K, s:K, s:K, s:d}",
+ "repeat_records", s->repeat_records,
+ "repeat_samples", s->repeat_samples,
+ "full_records", s->full_records,
+ "suffix_records", s->suffix_records,
+ "pop_push_records", s->pop_push_records,
+ "total_records", total_records,
+ "total_samples", total_samples,
+ "total_frames_written", s->total_frames_written,
+ "frames_saved", s->frames_saved,
+ "bytes_written", s->bytes_written,
+ "frame_compression_pct", compression_ratio
+ );
+}
+
+PyObject *
+binary_reader_get_stats(BinaryReader *reader)
+{
+ BinaryReaderStats *s = &reader->stats;
+
+ uint64_t total_records = s->repeat_records + s->full_records +
+ s->suffix_records + s->pop_push_records;
+
+ return Py_BuildValue(
+ "{s:K, s:K, s:K, s:K, s:K, s:K, s:K, s:K}",
+ "repeat_records", s->repeat_records,
+ "repeat_samples", s->repeat_samples,
+ "full_records", s->full_records,
+ "suffix_records", s->suffix_records,
+ "pop_push_records", s->pop_push_records,
+ "total_records", total_records,
+ "total_samples", s->total_samples,
+ "stack_reconstructions", s->stack_reconstructions
+ );
+}
+
+void
+binary_reader_close(BinaryReader *reader)
+{
+ if (!reader) {
+ return;
+ }
+
+ PyMem_Free(reader->filename);
+
+#if USE_MMAP
+ if (reader->mapped_data) {
+ munmap(reader->mapped_data, reader->mapped_size);
+ reader->mapped_data = NULL; /* Prevent use-after-free */
+ reader->mapped_size = 0;
+ }
+ if (reader->fd >= 0) {
+ close(reader->fd);
+ reader->fd = -1; /* Mark as closed */
+ }
+#else
+ if (reader->fp) {
+ fclose(reader->fp);
+ reader->fp = NULL;
+ }
+ if (reader->file_data) {
+ PyMem_Free(reader->file_data);
+ reader->file_data = NULL;
+ reader->file_size = 0;
+ }
+#endif
+
+ PyMem_Free(reader->decompressed_data);
+
+ if (reader->strings) {
+ for (uint32_t i = 0; i < reader->strings_count; i++) {
+ Py_XDECREF(reader->strings[i]);
+ }
+ PyMem_Free(reader->strings);
+ }
+
+ PyMem_Free(reader->frame_data);
+
+ if (reader->thread_states) {
+ for (size_t i = 0; i < reader->thread_state_count; i++) {
+ PyMem_Free(reader->thread_states[i].current_stack);
+ }
+ PyMem_Free(reader->thread_states);
+ }
+
+ PyMem_Free(reader);
+}
--- /dev/null
+/******************************************************************************
+ * Python Remote Debugging Module - Binary Writer Implementation
+ *
+ * High-performance binary file writer for profiling data with optional zstd
+ * streaming compression.
+ ******************************************************************************/
+
+#ifndef Py_BUILD_CORE_MODULE
+# define Py_BUILD_CORE_MODULE
+#endif
+
+#include "binary_io.h"
+#include "_remote_debugging.h"
+#include <string.h>
+
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
+
+/* ============================================================================
+ * CONSTANTS FOR BINARY FORMAT SIZES
+ * ============================================================================ */
+
+/* Sample header sizes */
+#define SAMPLE_HEADER_FIXED_SIZE 13 /* thread_id(8) + interpreter_id(4) + encoding(1) */
+#define SAMPLE_HEADER_MAX_SIZE 26 /* fixed + max_varint(10) + status(1) + margin */
+#define MAX_VARINT_SIZE 10 /* Maximum bytes for a varint64 */
+#define MAX_VARINT_SIZE_U32 5 /* Maximum bytes for a varint32 */
+/* Frame buffer: depth varint (max 2 bytes for 256) + 256 frames * 5 bytes/varint + margin */
+#define MAX_FRAME_BUFFER_SIZE ((MAX_STACK_DEPTH * MAX_VARINT_SIZE_U32) + MAX_VARINT_SIZE_U32 + 16)
+
+/* File structure sizes */
+#define FILE_FOOTER_SIZE 32
+
+/* ============================================================================
+ * WRITER-SPECIFIC UTILITY HELPERS
+ * ============================================================================ */
+
+/* Grow two parallel arrays together (e.g., strings and string_lengths).
+ * Returns 0 on success, -1 on error (sets PyErr).
+ * On error, original arrays are preserved (truly atomic update). */
+static inline int
+grow_parallel_arrays(void **array1, void **array2, size_t *capacity,
+ size_t elem_size1, size_t elem_size2)
+{
+ size_t old_cap = *capacity;
+
+ if (old_cap > SIZE_MAX / 2) {
+ PyErr_SetString(PyExc_OverflowError, "Array capacity overflow");
+ return -1;
+ }
+ size_t new_cap = old_cap * 2;
+
+ if (new_cap > SIZE_MAX / elem_size1 || new_cap > SIZE_MAX / elem_size2) {
+ PyErr_SetString(PyExc_OverflowError, "Array allocation size overflow");
+ return -1;
+ }
+
+ size_t new_size1 = new_cap * elem_size1;
+ size_t new_size2 = new_cap * elem_size2;
+ size_t old_size1 = old_cap * elem_size1;
+ size_t old_size2 = old_cap * elem_size2;
+
+ /* Allocate fresh memory blocks (not realloc) to ensure atomicity.
+ * If either allocation fails, original arrays are completely unchanged. */
+ void *new_array1 = PyMem_Malloc(new_size1);
+ if (!new_array1) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ void *new_array2 = PyMem_Malloc(new_size2);
+ if (!new_array2) {
+ /* Second allocation failed - free first and return with no state change */
+ PyMem_Free(new_array1);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ /* Both allocations succeeded - copy data and update pointers atomically */
+ memcpy(new_array1, *array1, old_size1);
+ memcpy(new_array2, *array2, old_size2);
+
+ PyMem_Free(*array1);
+ PyMem_Free(*array2);
+
+ *array1 = new_array1;
+ *array2 = new_array2;
+ *capacity = new_cap;
+ return 0;
+}
+
+/* Checked fwrite with GIL release - returns 0 on success, -1 on error (sets PyErr).
+ * This version releases the GIL during the write operation to allow other Python
+ * threads to run during potentially blocking I/O. */
+static inline int
+fwrite_checked_allow_threads(const void *data, size_t size, FILE *fp)
+{
+ size_t written;
+ Py_BEGIN_ALLOW_THREADS
+ written = fwrite(data, 1, size, fp);
+ Py_END_ALLOW_THREADS
+ if (written != size) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+ return 0;
+}
+
+/* Forward declaration for writer_write_bytes */
+static inline int writer_write_bytes(BinaryWriter *writer, const void *data, size_t size);
+
+/* Encode and write a varint u32 - returns 0 on success, -1 on error */
+static inline int
+writer_write_varint_u32(BinaryWriter *writer, uint32_t value)
+{
+ uint8_t buf[MAX_VARINT_SIZE];
+ size_t len = encode_varint_u32(buf, value);
+ return writer_write_bytes(writer, buf, len);
+}
+
+/* Encode and write a varint u64 - returns 0 on success, -1 on error */
+static inline int
+writer_write_varint_u64(BinaryWriter *writer, uint64_t value)
+{
+ uint8_t buf[MAX_VARINT_SIZE];
+ size_t len = encode_varint_u64(buf, value);
+ return writer_write_bytes(writer, buf, len);
+}
+
+
+/* ============================================================================
+ * UTILITY FUNCTIONS
+ * ============================================================================ */
+
+int
+binary_io_zstd_available(void)
+{
+#ifdef HAVE_ZSTD
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int
+binary_io_get_best_compression(void)
+{
+#ifdef HAVE_ZSTD
+ return COMPRESSION_ZSTD;
+#else
+ return COMPRESSION_NONE;
+#endif
+}
+
+/* ============================================================================
+ * BINARY WRITER IMPLEMENTATION
+ * ============================================================================ */
+
+static int
+writer_init_zstd(BinaryWriter *writer)
+{
+#ifdef HAVE_ZSTD
+ writer->zstd.cctx = ZSTD_createCCtx();
+ if (!writer->zstd.cctx) {
+ PyErr_SetString(PyExc_MemoryError, "Failed to create zstd compression context");
+ return -1;
+ }
+
+ /* Compression level 5: better ratio for repetitive profiling data */
+ size_t result = ZSTD_CCtx_setParameter(writer->zstd.cctx,
+ ZSTD_c_compressionLevel, 5);
+ if (ZSTD_isError(result)) {
+ PyErr_Format(PyExc_RuntimeError, "Failed to set zstd compression level: %s",
+ ZSTD_getErrorName(result));
+ ZSTD_freeCCtx(writer->zstd.cctx);
+ writer->zstd.cctx = NULL;
+ return -1;
+ }
+
+ /* Use large buffer (512KB) for fewer I/O syscalls */
+ writer->zstd.compressed_buffer = PyMem_Malloc(COMPRESSED_BUFFER_SIZE);
+ if (!writer->zstd.compressed_buffer) {
+ ZSTD_freeCCtx(writer->zstd.cctx);
+ writer->zstd.cctx = NULL;
+ PyErr_NoMemory();
+ return -1;
+ }
+ writer->zstd.compressed_buffer_size = COMPRESSED_BUFFER_SIZE;
+
+ return 0;
+#else
+ PyErr_SetString(PyExc_RuntimeError,
+ "zstd compression requested but not available (HAVE_ZSTD not defined)");
+ return -1;
+#endif
+}
+
+static int
+writer_flush_buffer(BinaryWriter *writer)
+{
+ if (writer->buffer_pos == 0) {
+ return 0;
+ }
+
+#ifdef HAVE_ZSTD
+ if (writer->compression_type == COMPRESSION_ZSTD) {
+ ZSTD_inBuffer input = { writer->write_buffer, writer->buffer_pos, 0 };
+
+ while (input.pos < input.size) {
+ ZSTD_outBuffer output = {
+ writer->zstd.compressed_buffer,
+ writer->zstd.compressed_buffer_size,
+ 0
+ };
+
+ size_t result = ZSTD_compressStream2(
+ writer->zstd.cctx, &output, &input, ZSTD_e_continue
+ );
+
+ if (ZSTD_isError(result)) {
+ PyErr_Format(PyExc_IOError, "zstd compression error: %s",
+ ZSTD_getErrorName(result));
+ return -1;
+ }
+
+ if (output.pos > 0) {
+ if (fwrite_checked_allow_threads(writer->zstd.compressed_buffer, output.pos, writer->fp) < 0) {
+ return -1;
+ }
+ }
+ }
+ } else
+#endif
+ {
+ if (fwrite_checked_allow_threads(writer->write_buffer, writer->buffer_pos, writer->fp) < 0) {
+ return -1;
+ }
+ }
+
+ writer->buffer_pos = 0;
+ return 0;
+}
+
+static inline int
+writer_write_bytes(BinaryWriter *writer, const void *data, size_t size)
+{
+ const uint8_t *src = (const uint8_t *)data;
+ size_t original_size = size;
+
+ while (size > 0) {
+ size_t space = writer->buffer_size - writer->buffer_pos;
+ size_t to_copy = (size < space) ? size : space;
+
+ memcpy(writer->write_buffer + writer->buffer_pos, src, to_copy);
+ writer->buffer_pos += to_copy;
+ src += to_copy;
+ size -= to_copy;
+
+ if (writer->buffer_pos == writer->buffer_size) {
+ if (writer_flush_buffer(writer) < 0) {
+ return -1;
+ }
+ }
+ }
+
+ writer->stats.bytes_written += original_size;
+ return 0;
+}
+
+/* ============================================================================
+ * HASH TABLE SUPPORT FUNCTIONS (using _Py_hashtable)
+ * ============================================================================ */
+
+/* Hash function for Python strings - uses Python's cached hash */
+static Py_uhash_t
+string_hash_func(const void *key)
+{
+ PyObject *str = (PyObject *)key;
+ Py_hash_t hash = PyObject_Hash(str);
+ if (hash == -1) {
+ PyErr_Clear();
+ return 0;
+ }
+ return (Py_uhash_t)hash;
+}
+
+static int
+string_compare_func(const void *key1, const void *key2)
+{
+ PyObject *str1 = (PyObject *)key1;
+ PyObject *str2 = (PyObject *)key2;
+ if (str1 == str2) {
+ return 1;
+ }
+ int result = PyObject_RichCompareBool(str1, str2, Py_EQ);
+ if (result == -1) {
+ PyErr_Clear();
+ return 0;
+ }
+ return result;
+}
+
+static void
+string_key_destroy(void *key)
+{
+ Py_XDECREF((PyObject *)key);
+}
+
+static Py_uhash_t
+frame_key_hash_func(const void *key)
+{
+ const FrameKey *fk = (const FrameKey *)key;
+ /* FNV-1a style hash combining all three values */
+ Py_uhash_t hash = 2166136261u;
+ hash ^= fk->filename_idx;
+ hash *= 16777619u;
+ hash ^= fk->funcname_idx;
+ hash *= 16777619u;
+ hash ^= (uint32_t)fk->lineno;
+ hash *= 16777619u;
+ return hash;
+}
+
+static int
+frame_key_compare_func(const void *key1, const void *key2)
+{
+ const FrameKey *fk1 = (const FrameKey *)key1;
+ const FrameKey *fk2 = (const FrameKey *)key2;
+ return (fk1->filename_idx == fk2->filename_idx &&
+ fk1->funcname_idx == fk2->funcname_idx &&
+ fk1->lineno == fk2->lineno);
+}
+
+static void
+frame_key_destroy(void *key)
+{
+ PyMem_Free(key);
+}
+
+static inline int
+writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
+{
+ void *existing = _Py_hashtable_get(writer->string_hash, string);
+ if (existing != NULL) {
+ *index = (uint32_t)(uintptr_t)existing - 1; /* index+1 stored to distinguish from NULL */
+ return 0;
+ }
+
+ if (writer->string_count >= writer->string_capacity) {
+ if (grow_parallel_arrays((void **)&writer->strings,
+ (void **)&writer->string_lengths,
+ &writer->string_capacity,
+ sizeof(char *), sizeof(size_t)) < 0) {
+ return -1;
+ }
+ }
+
+ Py_ssize_t str_len;
+ const char *str_data = PyUnicode_AsUTF8AndSize(string, &str_len);
+ if (!str_data) {
+ return -1;
+ }
+
+ char *str_copy = PyMem_Malloc(str_len + 1);
+ if (!str_copy) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memcpy(str_copy, str_data, str_len + 1);
+
+ *index = (uint32_t)writer->string_count;
+
+ /* Add to hash table FIRST to ensure atomic rollback on failure */
+ Py_INCREF(string);
+ if (_Py_hashtable_set(writer->string_hash, string, (void *)(uintptr_t)(*index + 1)) < 0) {
+ Py_DECREF(string);
+ PyMem_Free(str_copy);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ writer->strings[writer->string_count] = str_copy;
+ writer->string_lengths[writer->string_count] = str_len;
+ writer->string_count++;
+
+ return 0;
+}
+
+static inline int
+writer_intern_frame(BinaryWriter *writer, uint32_t filename_idx, uint32_t funcname_idx,
+ int32_t lineno, uint32_t *index)
+{
+ FrameKey lookup_key = {filename_idx, funcname_idx, lineno};
+
+ void *existing = _Py_hashtable_get(writer->frame_hash, &lookup_key);
+ if (existing != NULL) {
+ *index = (uint32_t)(uintptr_t)existing - 1; /* index+1 stored to distinguish from NULL */
+ return 0;
+ }
+
+ if (GROW_ARRAY(writer->frame_entries, writer->frame_count,
+ writer->frame_capacity, FrameEntry) < 0) {
+ return -1;
+ }
+
+ FrameKey *key = PyMem_Malloc(sizeof(FrameKey));
+ if (!key) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ *key = lookup_key;
+
+ *index = (uint32_t)writer->frame_count;
+ FrameEntry *fe = &writer->frame_entries[writer->frame_count];
+ fe->filename_idx = filename_idx;
+ fe->funcname_idx = funcname_idx;
+ fe->lineno = lineno;
+
+ if (_Py_hashtable_set(writer->frame_hash, key, (void *)(uintptr_t)(*index + 1)) < 0) {
+ PyMem_Free(key);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ writer->frame_count++;
+ return 0;
+}
+
+/* Get or create a thread entry for the given thread_id.
+ * Returns pointer to ThreadEntry, or NULL on allocation failure.
+ * If is_new is non-NULL, sets it to 1 if this is a new thread, 0 otherwise. */
+static ThreadEntry *
+writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
+ uint32_t interpreter_id, int *is_new)
+{
+ /* Linear search is OK for small number of threads.
+ * Key is (thread_id, interpreter_id) since same thread_id can exist in different interpreters. */
+ for (size_t i = 0; i < writer->thread_count; i++) {
+ if (writer->thread_entries[i].thread_id == thread_id &&
+ writer->thread_entries[i].interpreter_id == interpreter_id) {
+ if (is_new) {
+ *is_new = 0;
+ }
+ return &writer->thread_entries[i];
+ }
+ }
+
+ if (writer->thread_count >= writer->thread_capacity) {
+ ThreadEntry *new_entries = grow_array(writer->thread_entries,
+ &writer->thread_capacity,
+ sizeof(ThreadEntry));
+ if (!new_entries) {
+ return NULL;
+ }
+ writer->thread_entries = new_entries;
+ }
+
+ ThreadEntry *entry = &writer->thread_entries[writer->thread_count];
+ memset(entry, 0, sizeof(ThreadEntry));
+ entry->thread_id = thread_id;
+ entry->interpreter_id = interpreter_id;
+ entry->prev_timestamp = writer->start_time_us;
+ entry->prev_stack_capacity = MAX_STACK_DEPTH;
+ entry->pending_rle_capacity = INITIAL_RLE_CAPACITY;
+
+ entry->prev_stack = PyMem_Malloc(entry->prev_stack_capacity * sizeof(uint32_t));
+ if (!entry->prev_stack) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ entry->pending_rle = PyMem_Malloc(entry->pending_rle_capacity * sizeof(PendingRLESample));
+ if (!entry->pending_rle) {
+ PyMem_Free(entry->prev_stack);
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ writer->thread_count++;
+ if (is_new) {
+ *is_new = 1;
+ }
+ return entry;
+}
+
+/* Compare two stacks and return the encoding type and parameters.
+ * Sets:
+ * - shared_count: number of frames matching from bottom of stack
+ * - pop_count: frames to remove from prev stack
+ * - push_count: new frames to add
+ *
+ * Returns the best encoding type to use. */
+static int
+compare_stacks(const uint32_t *prev_stack, size_t prev_depth,
+ const uint32_t *curr_stack, size_t curr_depth,
+ size_t *shared_count, size_t *pop_count, size_t *push_count)
+{
+ if (prev_depth == curr_depth) {
+ int identical = 1;
+ for (size_t i = 0; i < prev_depth; i++) {
+ if (prev_stack[i] != curr_stack[i]) {
+ identical = 0;
+ break;
+ }
+ }
+ if (identical) {
+ *shared_count = prev_depth;
+ *pop_count = 0;
+ *push_count = 0;
+ return STACK_REPEAT;
+ }
+ }
+
+ /* Find longest common suffix (frames at the bottom/outer part of stack).
+ * Stacks are stored innermost-first, so suffix is at the end. */
+ size_t suffix_len = 0;
+ size_t min_depth = (prev_depth < curr_depth) ? prev_depth : curr_depth;
+
+ for (size_t i = 0; i < min_depth; i++) {
+ size_t prev_idx = prev_depth - 1 - i;
+ size_t curr_idx = curr_depth - 1 - i;
+ if (prev_stack[prev_idx] == curr_stack[curr_idx]) {
+ suffix_len++;
+ } else {
+ break;
+ }
+ }
+
+ *shared_count = suffix_len;
+ *pop_count = prev_depth - suffix_len;
+ *push_count = curr_depth - suffix_len;
+
+ /* Choose best encoding based on byte cost */
+ /* STACK_FULL: 1 (type) + 1-2 (depth) + sum(frame varints) */
+ /* STACK_SUFFIX: 1 (type) + 1-2 (shared) + 1-2 (new_count) + sum(new frame varints) */
+ /* STACK_POP_PUSH: 1 (type) + 1-2 (pop) + 1-2 (push) + sum(new frame varints) */
+
+ /* If no common suffix, use full stack */
+ if (suffix_len == 0) {
+ return STACK_FULL;
+ }
+
+ /* If only adding frames (suffix == prev_depth), use SUFFIX */
+ if (*pop_count == 0 && *push_count > 0) {
+ return STACK_SUFFIX;
+ }
+
+ /* If popping and/or pushing, use POP_PUSH if it saves bytes */
+ /* Heuristic: POP_PUSH is better when we're modifying top frames */
+ if (*pop_count > 0 || *push_count > 0) {
+ /* Use full stack if sharing less than half the frames */
+ if (suffix_len < curr_depth / 2) {
+ return STACK_FULL;
+ }
+ return STACK_POP_PUSH;
+ }
+
+ return STACK_FULL;
+}
+
+/* Write common sample header: thread_id(8) + interpreter_id(4) + encoding(1).
+ * Returns 0 on success, -1 on failure. */
+static inline int
+write_sample_header(BinaryWriter *writer, ThreadEntry *entry, uint8_t encoding)
+{
+ uint8_t header[SAMPLE_HEADER_FIXED_SIZE];
+ memcpy(header, &entry->thread_id, 8);
+ memcpy(header + 8, &entry->interpreter_id, 4);
+ header[12] = encoding;
+ return writer_write_bytes(writer, header, SAMPLE_HEADER_FIXED_SIZE);
+}
+
+/* Flush pending RLE samples for a thread.
+ * Writes the RLE record to the output buffer.
+ * Returns 0 on success, -1 on failure. */
+static int
+flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
+{
+ if (!entry->has_pending_rle || entry->pending_rle_count == 0) {
+ return 0;
+ }
+
+ /* Write RLE record:
+ * [thread_id: 8] [interpreter_id: 4] [STACK_REPEAT: 1] [count: varint]
+ * [timestamp_delta_1: varint] [status_1: 1] ... [timestamp_delta_N: varint] [status_N: 1]
+ */
+
+ if (write_sample_header(writer, entry, STACK_REPEAT) < 0) {
+ return -1;
+ }
+
+ if (writer_write_varint_u32(writer, (uint32_t)entry->pending_rle_count) < 0) {
+ return -1;
+ }
+
+ for (size_t i = 0; i < entry->pending_rle_count; i++) {
+ if (writer_write_varint_u64(writer, entry->pending_rle[i].timestamp_delta) < 0) {
+ return -1;
+ }
+ if (writer_write_bytes(writer, &entry->pending_rle[i].status, 1) < 0) {
+ return -1;
+ }
+ writer->total_samples++;
+ }
+
+ writer->stats.repeat_records++;
+ writer->stats.repeat_samples += entry->pending_rle_count;
+ /* Each RLE sample saves writing the entire stack */
+ writer->stats.frames_saved += entry->pending_rle_count * entry->prev_stack_depth;
+
+ entry->pending_rle_count = 0;
+ entry->has_pending_rle = 0;
+
+ return 0;
+}
+
+/* Write a single sample with the specified encoding.
+ * Returns 0 on success, -1 on failure. */
+static int
+write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
+ uint64_t timestamp_delta, uint8_t status,
+ int encoding_type,
+ const uint32_t *frame_indices, size_t stack_depth,
+ size_t shared_count, size_t pop_count, size_t push_count)
+{
+ /* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
+ uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
+ memcpy(header_buf, &entry->thread_id, 8);
+ memcpy(header_buf + 8, &entry->interpreter_id, 4);
+ header_buf[12] = (uint8_t)encoding_type;
+ size_t varint_len = encode_varint_u64(header_buf + 13, timestamp_delta);
+ header_buf[13 + varint_len] = status;
+
+ if (writer_write_bytes(writer, header_buf, 14 + varint_len) < 0) {
+ return -1;
+ }
+
+ uint8_t frame_buf[MAX_FRAME_BUFFER_SIZE];
+ size_t frame_buf_pos = 0;
+ size_t frames_written = 0;
+
+ switch (encoding_type) {
+ case STACK_FULL:
+ /* [depth: varint] [frame_idx: varint]... */
+ frame_buf_pos += encode_varint_u32(frame_buf, (uint32_t)stack_depth);
+ for (size_t i = 0; i < stack_depth; i++) {
+ frame_buf_pos += encode_varint_u32(frame_buf + frame_buf_pos, frame_indices[i]);
+ }
+ frames_written = stack_depth;
+ writer->stats.full_records++;
+ break;
+
+ case STACK_SUFFIX:
+ /* [shared_count: varint] [new_count: varint] [new_frame_idx: varint]... */
+ frame_buf_pos += encode_varint_u32(frame_buf, (uint32_t)shared_count);
+ frame_buf_pos += encode_varint_u32(frame_buf + frame_buf_pos, (uint32_t)push_count);
+ /* New frames are at the top (beginning) of current stack */
+ for (size_t i = 0; i < push_count; i++) {
+ frame_buf_pos += encode_varint_u32(frame_buf + frame_buf_pos, frame_indices[i]);
+ }
+ frames_written = push_count;
+ writer->stats.suffix_records++;
+ /* Saved writing shared_count frames */
+ writer->stats.frames_saved += shared_count;
+ break;
+
+ case STACK_POP_PUSH:
+ /* [pop_count: varint] [push_count: varint] [new_frame_idx: varint]... */
+ frame_buf_pos += encode_varint_u32(frame_buf, (uint32_t)pop_count);
+ frame_buf_pos += encode_varint_u32(frame_buf + frame_buf_pos, (uint32_t)push_count);
+ /* New frames are at the top (beginning) of current stack */
+ for (size_t i = 0; i < push_count; i++) {
+ frame_buf_pos += encode_varint_u32(frame_buf + frame_buf_pos, frame_indices[i]);
+ }
+ frames_written = push_count;
+ writer->stats.pop_push_records++;
+ /* Saved writing shared_count frames (stack_depth - push_count if we had written full) */
+ writer->stats.frames_saved += shared_count;
+ break;
+
+ default:
+ PyErr_SetString(PyExc_RuntimeError, "Invalid stack encoding type");
+ return -1;
+ }
+
+ if (writer_write_bytes(writer, frame_buf, frame_buf_pos) < 0) {
+ return -1;
+ }
+
+ writer->stats.total_frames_written += frames_written;
+ writer->total_samples++;
+ return 0;
+}
+
+BinaryWriter *
+binary_writer_create(const char *filename, uint64_t sample_interval_us, int compression_type,
+ uint64_t start_time_us)
+{
+ BinaryWriter *writer = PyMem_Calloc(1, sizeof(BinaryWriter));
+ if (!writer) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ writer->filename = PyMem_Malloc(strlen(filename) + 1);
+ if (!writer->filename) {
+ PyMem_Free(writer);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ strcpy(writer->filename, filename);
+
+ writer->start_time_us = start_time_us;
+ writer->sample_interval_us = sample_interval_us;
+ writer->compression_type = compression_type;
+
+ writer->write_buffer = PyMem_Malloc(WRITE_BUFFER_SIZE);
+ if (!writer->write_buffer) {
+ goto error;
+ }
+ writer->buffer_size = WRITE_BUFFER_SIZE;
+
+ writer->string_hash = _Py_hashtable_new_full(
+ string_hash_func,
+ string_compare_func,
+ string_key_destroy, /* Key destroy: decref the Python string */
+ NULL, /* Value destroy: values are just indices, not pointers */
+ NULL /* Use default allocator */
+ );
+ if (!writer->string_hash) {
+ goto error;
+ }
+ writer->strings = PyMem_Malloc(INITIAL_STRING_CAPACITY * sizeof(char *));
+ if (!writer->strings) {
+ goto error;
+ }
+ writer->string_lengths = PyMem_Malloc(INITIAL_STRING_CAPACITY * sizeof(size_t));
+ if (!writer->string_lengths) {
+ goto error;
+ }
+ writer->string_capacity = INITIAL_STRING_CAPACITY;
+
+ writer->frame_hash = _Py_hashtable_new_full(
+ frame_key_hash_func,
+ frame_key_compare_func,
+ frame_key_destroy, /* Key destroy: free the FrameKey */
+ NULL, /* Value destroy: values are just indices, not pointers */
+ NULL /* Use default allocator */
+ );
+ if (!writer->frame_hash) {
+ goto error;
+ }
+ writer->frame_entries = PyMem_Malloc(INITIAL_FRAME_CAPACITY * sizeof(FrameEntry));
+ if (!writer->frame_entries) {
+ goto error;
+ }
+ writer->frame_capacity = INITIAL_FRAME_CAPACITY;
+
+ writer->thread_entries = PyMem_Malloc(INITIAL_THREAD_CAPACITY * sizeof(ThreadEntry));
+ if (!writer->thread_entries) {
+ goto error;
+ }
+ writer->thread_capacity = INITIAL_THREAD_CAPACITY;
+
+ if (compression_type == COMPRESSION_ZSTD) {
+ if (writer_init_zstd(writer) < 0) {
+ goto error;
+ }
+ }
+
+ writer->fp = fopen(filename, "wb");
+ if (!writer->fp) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ goto error;
+ }
+
+ /* Hint sequential write pattern to kernel for better I/O scheduling */
+#if defined(__linux__) && defined(POSIX_FADV_SEQUENTIAL)
+ {
+ int fd = fileno(writer->fp);
+ if (fd >= 0) {
+ (void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+ }
+ }
+#endif
+
+ uint8_t header[FILE_HEADER_PLACEHOLDER_SIZE] = {0};
+ if (fwrite_checked_allow_threads(header, FILE_HEADER_PLACEHOLDER_SIZE, writer->fp) < 0) {
+ goto error;
+ }
+
+ return writer;
+
+error:
+ binary_writer_destroy(writer);
+ return NULL;
+}
+
+/* Build a frame stack from Python frame list by interning all strings and frames.
+ * Returns 0 on success, -1 on error. */
+static int
+build_frame_stack(BinaryWriter *writer, PyObject *frame_list,
+ uint32_t *curr_stack, size_t *curr_depth)
+{
+ Py_ssize_t stack_depth = PyList_Size(frame_list);
+ *curr_depth = (stack_depth < MAX_STACK_DEPTH) ? stack_depth : MAX_STACK_DEPTH;
+
+ for (Py_ssize_t k = 0; k < (Py_ssize_t)*curr_depth; k++) {
+ /* Use unchecked accessors since we control the data structures */
+ PyObject *frame_info = PyList_GET_ITEM(frame_list, k);
+
+ /* Get filename, location, funcname from FrameInfo using unchecked access */
+ PyObject *filename = PyStructSequence_GET_ITEM(frame_info, 0);
+ PyObject *location = PyStructSequence_GET_ITEM(frame_info, 1);
+ PyObject *funcname = PyStructSequence_GET_ITEM(frame_info, 2);
+
+ /* Extract lineno from location (can be None for synthetic frames) */
+ int32_t lineno = 0;
+ if (location != Py_None) {
+ /* Use unchecked access - first element is lineno */
+ PyObject *lineno_obj = PyTuple_Check(location) ?
+ PyTuple_GET_ITEM(location, 0) :
+ PyStructSequence_GET_ITEM(location, 0);
+ lineno = (int32_t)PyLong_AsLong(lineno_obj);
+ if (UNLIKELY(PyErr_Occurred() != NULL)) {
+ PyErr_Clear();
+ lineno = 0;
+ }
+ }
+
+ /* Intern filename */
+ uint32_t filename_idx;
+ if (writer_intern_string(writer, filename, &filename_idx) < 0) {
+ return -1;
+ }
+
+ /* Intern funcname */
+ uint32_t funcname_idx;
+ if (writer_intern_string(writer, funcname, &funcname_idx) < 0) {
+ return -1;
+ }
+
+ /* Intern frame */
+ uint32_t frame_idx;
+ if (writer_intern_frame(writer, filename_idx, funcname_idx, lineno, &frame_idx) < 0) {
+ return -1;
+ }
+
+ curr_stack[k] = frame_idx;
+ }
+ return 0;
+}
+
+/* Process a single thread's sample.
+ * Returns 0 on success, -1 on error. */
+static int
+process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
+ uint32_t interpreter_id, uint64_t timestamp_us)
+{
+ PyObject *thread_id_obj = PyStructSequence_GET_ITEM(thread_info, 0);
+ PyObject *status_obj = PyStructSequence_GET_ITEM(thread_info, 1);
+ PyObject *frame_list = PyStructSequence_GET_ITEM(thread_info, 2);
+
+ uint64_t thread_id = PyLong_AsUnsignedLongLong(thread_id_obj);
+ if (thread_id == (uint64_t)-1 && PyErr_Occurred()) {
+ return -1;
+ }
+ long status_long = PyLong_AsLong(status_obj);
+ if (status_long == -1 && PyErr_Occurred()) {
+ return -1;
+ }
+ uint8_t status = (uint8_t)status_long;
+
+ int is_new_thread = 0;
+ ThreadEntry *entry = writer_get_or_create_thread_entry(
+ writer, thread_id, interpreter_id, &is_new_thread);
+ if (!entry) {
+ return -1;
+ }
+
+ /* Calculate timestamp delta */
+ uint64_t delta = timestamp_us - entry->prev_timestamp;
+ entry->prev_timestamp = timestamp_us;
+
+ /* Process frames and build current stack */
+ uint32_t curr_stack[MAX_STACK_DEPTH];
+ size_t curr_depth;
+ if (build_frame_stack(writer, frame_list, curr_stack, &curr_depth) < 0) {
+ return -1;
+ }
+
+ /* Compare with previous stack to determine encoding */
+ size_t shared_count, pop_count, push_count;
+ int encoding = compare_stacks(
+ entry->prev_stack, entry->prev_stack_depth,
+ curr_stack, curr_depth,
+ &shared_count, &pop_count, &push_count);
+
+ if (encoding == STACK_REPEAT && !is_new_thread) {
+ /* Buffer this sample for RLE */
+ if (GROW_ARRAY(entry->pending_rle, entry->pending_rle_count,
+ entry->pending_rle_capacity, PendingRLESample) < 0) {
+ return -1;
+ }
+ entry->pending_rle[entry->pending_rle_count].timestamp_delta = delta;
+ entry->pending_rle[entry->pending_rle_count].status = status;
+ entry->pending_rle_count++;
+ entry->has_pending_rle = 1;
+ } else {
+ /* Stack changed - flush any pending RLE first */
+ if (entry->has_pending_rle) {
+ if (flush_pending_rle(writer, entry) < 0) {
+ return -1;
+ }
+ }
+
+ if (write_sample_with_encoding(writer, entry, delta, status, encoding,
+ curr_stack, curr_depth,
+ shared_count, pop_count, push_count) < 0) {
+ return -1;
+ }
+
+ memcpy(entry->prev_stack, curr_stack, curr_depth * sizeof(uint32_t));
+ entry->prev_stack_depth = curr_depth;
+ }
+
+ return 0;
+}
+
+int
+binary_writer_write_sample(BinaryWriter *writer, PyObject *stack_frames, uint64_t timestamp_us)
+{
+ if (!PyList_Check(stack_frames)) {
+ PyErr_SetString(PyExc_TypeError, "stack_frames must be a list");
+ return -1;
+ }
+
+ Py_ssize_t num_interpreters = PyList_GET_SIZE(stack_frames);
+ for (Py_ssize_t i = 0; i < num_interpreters; i++) {
+ PyObject *interp_info = PyList_GET_ITEM(stack_frames, i);
+
+ PyObject *interp_id_obj = PyStructSequence_GET_ITEM(interp_info, 0);
+ PyObject *threads = PyStructSequence_GET_ITEM(interp_info, 1);
+
+ unsigned long interp_id_long = PyLong_AsUnsignedLong(interp_id_obj);
+ if (interp_id_long == (unsigned long)-1 && PyErr_Occurred()) {
+ return -1;
+ }
+ /* Bounds check: interpreter_id is stored as uint32_t in binary format */
+ if (interp_id_long > UINT32_MAX) {
+ PyErr_Format(PyExc_OverflowError,
+ "interpreter_id %lu exceeds maximum value %lu",
+ interp_id_long, (unsigned long)UINT32_MAX);
+ return -1;
+ }
+ uint32_t interpreter_id = (uint32_t)interp_id_long;
+
+ Py_ssize_t num_threads = PyList_GET_SIZE(threads);
+ for (Py_ssize_t j = 0; j < num_threads; j++) {
+ PyObject *thread_info = PyList_GET_ITEM(threads, j);
+ if (process_thread_sample(writer, thread_info, interpreter_id, timestamp_us) < 0) {
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
+binary_writer_finalize(BinaryWriter *writer)
+{
+ for (size_t i = 0; i < writer->thread_count; i++) {
+ if (writer->thread_entries[i].has_pending_rle) {
+ if (flush_pending_rle(writer, &writer->thread_entries[i]) < 0) {
+ return -1;
+ }
+ }
+ }
+
+ if (writer_flush_buffer(writer) < 0) {
+ return -1;
+ }
+
+#ifdef HAVE_ZSTD
+ /* Finalize compression stream */
+ if (writer->compression_type == COMPRESSION_ZSTD && writer->zstd.cctx) {
+ ZSTD_inBuffer input = { NULL, 0, 0 };
+ size_t remaining;
+
+ do {
+ ZSTD_outBuffer output = {
+ writer->zstd.compressed_buffer,
+ writer->zstd.compressed_buffer_size,
+ 0
+ };
+
+ remaining = ZSTD_compressStream2(writer->zstd.cctx, &output, &input, ZSTD_e_end);
+
+ if (ZSTD_isError(remaining)) {
+ PyErr_Format(PyExc_IOError, "zstd finalization error: %s",
+ ZSTD_getErrorName(remaining));
+ return -1;
+ }
+
+ if (output.pos > 0) {
+ if (fwrite_checked_allow_threads(writer->zstd.compressed_buffer, output.pos, writer->fp) < 0) {
+ return -1;
+ }
+ }
+ } while (remaining > 0);
+ }
+#endif
+
+ /* Use 64-bit file position for >2GB files */
+ file_offset_t string_table_offset = FTELL64(writer->fp);
+ if (string_table_offset < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+
+ /* Release GIL during potentially large writes */
+ for (size_t i = 0; i < writer->string_count; i++) {
+ uint8_t len_buf[10];
+ size_t len_size = encode_varint_u32(len_buf, (uint32_t)writer->string_lengths[i]);
+ if (fwrite_checked_allow_threads(len_buf, len_size, writer->fp) < 0 ||
+ fwrite_checked_allow_threads(writer->strings[i], writer->string_lengths[i], writer->fp) < 0) {
+ return -1;
+ }
+ }
+
+ file_offset_t frame_table_offset = FTELL64(writer->fp);
+ if (frame_table_offset < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+
+ for (size_t i = 0; i < writer->frame_count; i++) {
+ FrameEntry *entry = &writer->frame_entries[i];
+ uint8_t buf[30];
+ size_t pos = encode_varint_u32(buf, entry->filename_idx);
+ pos += encode_varint_u32(buf + pos, entry->funcname_idx);
+ pos += encode_varint_i32(buf + pos, entry->lineno);
+ if (fwrite_checked_allow_threads(buf, pos, writer->fp) < 0) {
+ return -1;
+ }
+ }
+
+ /* Footer: string_count(4) + frame_count(4) + file_size(8) + checksum(16) */
+ file_offset_t footer_offset = FTELL64(writer->fp);
+ if (footer_offset < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+ uint64_t file_size = (uint64_t)footer_offset + 32;
+ uint8_t footer[32] = {0};
+ /* Cast size_t to uint32_t before memcpy to ensure correct bytes are copied
+ * on both little-endian and big-endian systems (size_t is 8 bytes on 64-bit) */
+ uint32_t string_count_u32 = (uint32_t)writer->string_count;
+ uint32_t frame_count_u32 = (uint32_t)writer->frame_count;
+ memcpy(footer + 0, &string_count_u32, 4);
+ memcpy(footer + 4, &frame_count_u32, 4);
+ memcpy(footer + 8, &file_size, 8);
+ /* bytes 16-31: checksum placeholder (zeros) */
+ if (fwrite_checked_allow_threads(footer, 32, writer->fp) < 0) {
+ return -1;
+ }
+
+ if (FSEEK64(writer->fp, 0, SEEK_SET) < 0) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+
+ /* Convert file offsets and counts to fixed-width types for portable header format.
+ * This ensures correct behavior on both little-endian and big-endian systems. */
+ uint64_t string_table_offset_u64 = (uint64_t)string_table_offset;
+ uint64_t frame_table_offset_u64 = (uint64_t)frame_table_offset;
+ uint32_t thread_count_u32 = (uint32_t)writer->thread_count;
+ uint32_t compression_type_u32 = (uint32_t)writer->compression_type;
+
+ uint8_t header[FILE_HEADER_SIZE] = {0};
+ uint32_t magic = BINARY_FORMAT_MAGIC;
+ uint32_t version = BINARY_FORMAT_VERSION;
+ memcpy(header + HDR_OFF_MAGIC, &magic, HDR_SIZE_MAGIC);
+ memcpy(header + HDR_OFF_VERSION, &version, HDR_SIZE_VERSION);
+ header[HDR_OFF_PY_MAJOR] = PY_MAJOR_VERSION;
+ header[HDR_OFF_PY_MINOR] = PY_MINOR_VERSION;
+ header[HDR_OFF_PY_MICRO] = PY_MICRO_VERSION;
+ memcpy(header + HDR_OFF_START_TIME, &writer->start_time_us, HDR_SIZE_START_TIME);
+ memcpy(header + HDR_OFF_INTERVAL, &writer->sample_interval_us, HDR_SIZE_INTERVAL);
+ memcpy(header + HDR_OFF_SAMPLES, &writer->total_samples, HDR_SIZE_SAMPLES);
+ memcpy(header + HDR_OFF_THREADS, &thread_count_u32, HDR_SIZE_THREADS);
+ memcpy(header + HDR_OFF_STR_TABLE, &string_table_offset_u64, HDR_SIZE_STR_TABLE);
+ memcpy(header + HDR_OFF_FRAME_TABLE, &frame_table_offset_u64, HDR_SIZE_FRAME_TABLE);
+ memcpy(header + HDR_OFF_COMPRESSION, &compression_type_u32, HDR_SIZE_COMPRESSION);
+ if (fwrite_checked_allow_threads(header, FILE_HEADER_SIZE, writer->fp) < 0) {
+ return -1;
+ }
+
+ if (fclose(writer->fp) != 0) {
+ writer->fp = NULL;
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+ writer->fp = NULL;
+
+ return 0;
+}
+
+void
+binary_writer_destroy(BinaryWriter *writer)
+{
+ if (!writer) {
+ return;
+ }
+
+ if (writer->fp) {
+ fclose(writer->fp);
+ }
+
+ PyMem_Free(writer->filename);
+ PyMem_Free(writer->write_buffer);
+
+#ifdef HAVE_ZSTD
+ if (writer->zstd.cctx) {
+ ZSTD_freeCCtx(writer->zstd.cctx);
+ }
+ PyMem_Free(writer->zstd.compressed_buffer);
+#endif
+
+ if (writer->string_hash) {
+ _Py_hashtable_destroy(writer->string_hash);
+ }
+ if (writer->strings) {
+ for (size_t i = 0; i < writer->string_count; i++) {
+ PyMem_Free(writer->strings[i]);
+ }
+ PyMem_Free(writer->strings);
+ }
+ PyMem_Free(writer->string_lengths);
+
+ if (writer->frame_hash) {
+ _Py_hashtable_destroy(writer->frame_hash);
+ }
+ PyMem_Free(writer->frame_entries);
+
+ if (writer->thread_entries) {
+ for (size_t i = 0; i < writer->thread_count; i++) {
+ PyMem_Free(writer->thread_entries[i].prev_stack);
+ PyMem_Free(writer->thread_entries[i].pending_rle);
+ }
+ PyMem_Free(writer->thread_entries);
+ }
+
+ PyMem_Free(writer);
+}
+
# include "pycore_runtime.h" // _Py_ID()
#endif
#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION()
+#include "pycore_long.h" // _PyLong_UnsignedLongLong_Converter()
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
return return_value;
}
+PyDoc_STRVAR(_remote_debugging_BinaryWriter___init____doc__,
+"BinaryWriter(filename, sample_interval_us, start_time_us, *,\n"
+" compression=0)\n"
+"--\n"
+"\n"
+"High-performance binary writer for profiling data.\n"
+"\n"
+"Arguments:\n"
+" filename: Path to output file\n"
+" sample_interval_us: Sampling interval in microseconds\n"
+" start_time_us: Start timestamp in microseconds (from time.monotonic() * 1e6)\n"
+" compression: 0=none, 1=zstd (default: 0)\n"
+"\n"
+"Use as a context manager or call finalize() when done.");
+
+static int
+_remote_debugging_BinaryWriter___init___impl(BinaryWriterObject *self,
+ const char *filename,
+ unsigned long long sample_interval_us,
+ unsigned long long start_time_us,
+ int compression);
+
+static int
+_remote_debugging_BinaryWriter___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ int return_value = -1;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 4
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(filename), &_Py_ID(sample_interval_us), &_Py_ID(start_time_us), &_Py_ID(compression), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"filename", "sample_interval_us", "start_time_us", "compression", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "BinaryWriter",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[4];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 3;
+ const char *filename;
+ unsigned long long sample_interval_us;
+ unsigned long long start_time_us;
+ int compression = 0;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 3, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(fastargs[0])) {
+ _PyArg_BadArgument("BinaryWriter", "argument 'filename'", "str", fastargs[0]);
+ goto exit;
+ }
+ Py_ssize_t filename_length;
+ filename = PyUnicode_AsUTF8AndSize(fastargs[0], &filename_length);
+ if (filename == NULL) {
+ goto exit;
+ }
+ if (strlen(filename) != (size_t)filename_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
+ if (!_PyLong_UnsignedLongLong_Converter(fastargs[1], &sample_interval_us)) {
+ goto exit;
+ }
+ if (!_PyLong_UnsignedLongLong_Converter(fastargs[2], &start_time_us)) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_kwonly;
+ }
+ compression = PyLong_AsInt(fastargs[3]);
+ if (compression == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_kwonly:
+ return_value = _remote_debugging_BinaryWriter___init___impl((BinaryWriterObject *)self, filename, sample_interval_us, start_time_us, compression);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter_write_sample__doc__,
+"write_sample($self, /, stack_frames, timestamp_us)\n"
+"--\n"
+"\n"
+"Write a sample to the binary file.\n"
+"\n"
+"Arguments:\n"
+" stack_frames: List of InterpreterInfo objects\n"
+" timestamp_us: Current timestamp in microseconds (from time.monotonic() * 1e6)");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER_WRITE_SAMPLE_METHODDEF \
+ {"write_sample", _PyCFunction_CAST(_remote_debugging_BinaryWriter_write_sample), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_BinaryWriter_write_sample__doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter_write_sample_impl(BinaryWriterObject *self,
+ PyObject *stack_frames,
+ unsigned long long timestamp_us);
+
+static PyObject *
+_remote_debugging_BinaryWriter_write_sample(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(stack_frames), &_Py_ID(timestamp_us), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"stack_frames", "timestamp_us", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "write_sample",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *stack_frames;
+ unsigned long long timestamp_us;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ stack_frames = args[0];
+ if (!_PyLong_UnsignedLongLong_Converter(args[1], ×tamp_us)) {
+ goto exit;
+ }
+ return_value = _remote_debugging_BinaryWriter_write_sample_impl((BinaryWriterObject *)self, stack_frames, timestamp_us);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter_finalize__doc__,
+"finalize($self, /)\n"
+"--\n"
+"\n"
+"Finalize and close the binary file.\n"
+"\n"
+"Writes string/frame tables, footer, and updates header.");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER_FINALIZE_METHODDEF \
+ {"finalize", (PyCFunction)_remote_debugging_BinaryWriter_finalize, METH_NOARGS, _remote_debugging_BinaryWriter_finalize__doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter_finalize_impl(BinaryWriterObject *self);
+
+static PyObject *
+_remote_debugging_BinaryWriter_finalize(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryWriter_finalize_impl((BinaryWriterObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter_close__doc__,
+"close($self, /)\n"
+"--\n"
+"\n"
+"Close the writer without finalizing (discards data).");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER_CLOSE_METHODDEF \
+ {"close", (PyCFunction)_remote_debugging_BinaryWriter_close, METH_NOARGS, _remote_debugging_BinaryWriter_close__doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter_close_impl(BinaryWriterObject *self);
+
+static PyObject *
+_remote_debugging_BinaryWriter_close(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryWriter_close_impl((BinaryWriterObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter___enter____doc__,
+"__enter__($self, /)\n"
+"--\n"
+"\n"
+"Enter context manager.");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER___ENTER___METHODDEF \
+ {"__enter__", (PyCFunction)_remote_debugging_BinaryWriter___enter__, METH_NOARGS, _remote_debugging_BinaryWriter___enter____doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter___enter___impl(BinaryWriterObject *self);
+
+static PyObject *
+_remote_debugging_BinaryWriter___enter__(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryWriter___enter___impl((BinaryWriterObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter___exit____doc__,
+"__exit__($self, /, exc_type=None, exc_val=None, exc_tb=None)\n"
+"--\n"
+"\n"
+"Exit context manager, finalizing the file.");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER___EXIT___METHODDEF \
+ {"__exit__", _PyCFunction_CAST(_remote_debugging_BinaryWriter___exit__), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_BinaryWriter___exit____doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter___exit___impl(BinaryWriterObject *self,
+ PyObject *exc_type,
+ PyObject *exc_val,
+ PyObject *exc_tb);
+
+static PyObject *
+_remote_debugging_BinaryWriter___exit__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 3
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(exc_type), &_Py_ID(exc_val), &_Py_ID(exc_tb), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"exc_type", "exc_val", "exc_tb", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "__exit__",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[3];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
+ PyObject *exc_type = Py_None;
+ PyObject *exc_val = Py_None;
+ PyObject *exc_tb = Py_None;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (args[0]) {
+ exc_type = args[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ if (args[1]) {
+ exc_val = args[1];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ exc_tb = args[2];
+skip_optional_pos:
+ return_value = _remote_debugging_BinaryWriter___exit___impl((BinaryWriterObject *)self, exc_type, exc_val, exc_tb);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryWriter_get_stats__doc__,
+"get_stats($self, /)\n"
+"--\n"
+"\n"
+"Get encoding statistics for the writer.\n"
+"\n"
+"Returns a dict with encoding statistics including repeat/full/suffix/pop-push\n"
+"record counts, frames written/saved, and compression ratio.");
+
+#define _REMOTE_DEBUGGING_BINARYWRITER_GET_STATS_METHODDEF \
+ {"get_stats", (PyCFunction)_remote_debugging_BinaryWriter_get_stats, METH_NOARGS, _remote_debugging_BinaryWriter_get_stats__doc__},
+
+static PyObject *
+_remote_debugging_BinaryWriter_get_stats_impl(BinaryWriterObject *self);
+
+static PyObject *
+_remote_debugging_BinaryWriter_get_stats(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryWriter_get_stats_impl((BinaryWriterObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader___init____doc__,
+"BinaryReader(filename)\n"
+"--\n"
+"\n"
+"High-performance binary reader for profiling data.\n"
+"\n"
+"Arguments:\n"
+" filename: Path to input file\n"
+"\n"
+"Use as a context manager or call close() when done.");
+
+static int
+_remote_debugging_BinaryReader___init___impl(BinaryReaderObject *self,
+ const char *filename);
+
+static int
+_remote_debugging_BinaryReader___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ int return_value = -1;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(filename), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"filename", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "BinaryReader",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ const char *filename;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(fastargs[0])) {
+ _PyArg_BadArgument("BinaryReader", "argument 'filename'", "str", fastargs[0]);
+ goto exit;
+ }
+ Py_ssize_t filename_length;
+ filename = PyUnicode_AsUTF8AndSize(fastargs[0], &filename_length);
+ if (filename == NULL) {
+ goto exit;
+ }
+ if (strlen(filename) != (size_t)filename_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
+ return_value = _remote_debugging_BinaryReader___init___impl((BinaryReaderObject *)self, filename);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader_replay__doc__,
+"replay($self, /, collector, progress_callback=None)\n"
+"--\n"
+"\n"
+"Replay samples through a collector.\n"
+"\n"
+"Arguments:\n"
+" collector: Collector object with collect() method\n"
+" progress_callback: Optional callable(current, total)\n"
+"\n"
+"Returns:\n"
+" Number of samples replayed");
+
+#define _REMOTE_DEBUGGING_BINARYREADER_REPLAY_METHODDEF \
+ {"replay", _PyCFunction_CAST(_remote_debugging_BinaryReader_replay), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_BinaryReader_replay__doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader_replay_impl(BinaryReaderObject *self,
+ PyObject *collector,
+ PyObject *progress_callback);
+
+static PyObject *
+_remote_debugging_BinaryReader_replay(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(collector), &_Py_ID(progress_callback), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"collector", "progress_callback", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "replay",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ PyObject *collector;
+ PyObject *progress_callback = Py_None;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ collector = args[0];
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ progress_callback = args[1];
+skip_optional_pos:
+ return_value = _remote_debugging_BinaryReader_replay_impl((BinaryReaderObject *)self, collector, progress_callback);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader_get_info__doc__,
+"get_info($self, /)\n"
+"--\n"
+"\n"
+"Get metadata about the binary file.\n"
+"\n"
+"Returns:\n"
+" Dict with file metadata");
+
+#define _REMOTE_DEBUGGING_BINARYREADER_GET_INFO_METHODDEF \
+ {"get_info", (PyCFunction)_remote_debugging_BinaryReader_get_info, METH_NOARGS, _remote_debugging_BinaryReader_get_info__doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader_get_info_impl(BinaryReaderObject *self);
+
+static PyObject *
+_remote_debugging_BinaryReader_get_info(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryReader_get_info_impl((BinaryReaderObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader_get_stats__doc__,
+"get_stats($self, /)\n"
+"--\n"
+"\n"
+"Get reconstruction statistics from replay.\n"
+"\n"
+"Returns a dict with statistics about record types decoded and samples\n"
+"reconstructed during replay.");
+
+#define _REMOTE_DEBUGGING_BINARYREADER_GET_STATS_METHODDEF \
+ {"get_stats", (PyCFunction)_remote_debugging_BinaryReader_get_stats, METH_NOARGS, _remote_debugging_BinaryReader_get_stats__doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader_get_stats_impl(BinaryReaderObject *self);
+
+static PyObject *
+_remote_debugging_BinaryReader_get_stats(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryReader_get_stats_impl((BinaryReaderObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader_close__doc__,
+"close($self, /)\n"
+"--\n"
+"\n"
+"Close the reader and free resources.");
+
+#define _REMOTE_DEBUGGING_BINARYREADER_CLOSE_METHODDEF \
+ {"close", (PyCFunction)_remote_debugging_BinaryReader_close, METH_NOARGS, _remote_debugging_BinaryReader_close__doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader_close_impl(BinaryReaderObject *self);
+
+static PyObject *
+_remote_debugging_BinaryReader_close(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryReader_close_impl((BinaryReaderObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader___enter____doc__,
+"__enter__($self, /)\n"
+"--\n"
+"\n"
+"Enter context manager.");
+
+#define _REMOTE_DEBUGGING_BINARYREADER___ENTER___METHODDEF \
+ {"__enter__", (PyCFunction)_remote_debugging_BinaryReader___enter__, METH_NOARGS, _remote_debugging_BinaryReader___enter____doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader___enter___impl(BinaryReaderObject *self);
+
+static PyObject *
+_remote_debugging_BinaryReader___enter__(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_BinaryReader___enter___impl((BinaryReaderObject *)self);
+}
+
+PyDoc_STRVAR(_remote_debugging_BinaryReader___exit____doc__,
+"__exit__($self, /, exc_type=None, exc_val=None, exc_tb=None)\n"
+"--\n"
+"\n"
+"Exit context manager, closing the file.");
+
+#define _REMOTE_DEBUGGING_BINARYREADER___EXIT___METHODDEF \
+ {"__exit__", _PyCFunction_CAST(_remote_debugging_BinaryReader___exit__), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_BinaryReader___exit____doc__},
+
+static PyObject *
+_remote_debugging_BinaryReader___exit___impl(BinaryReaderObject *self,
+ PyObject *exc_type,
+ PyObject *exc_val,
+ PyObject *exc_tb);
+
+static PyObject *
+_remote_debugging_BinaryReader___exit__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 3
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(exc_type), &_Py_ID(exc_val), &_Py_ID(exc_tb), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"exc_type", "exc_val", "exc_tb", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "__exit__",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[3];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
+ PyObject *exc_type = Py_None;
+ PyObject *exc_val = Py_None;
+ PyObject *exc_tb = Py_None;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (args[0]) {
+ exc_type = args[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ if (args[1]) {
+ exc_val = args[1];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ exc_tb = args[2];
+skip_optional_pos:
+ return_value = _remote_debugging_BinaryReader___exit___impl((BinaryReaderObject *)self, exc_type, exc_val, exc_tb);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_zstd_available__doc__,
+"zstd_available($module, /)\n"
+"--\n"
+"\n"
+"Check if zstd compression is available.\n"
+"\n"
+"Returns:\n"
+" True if zstd available, False otherwise");
+
+#define _REMOTE_DEBUGGING_ZSTD_AVAILABLE_METHODDEF \
+ {"zstd_available", (PyCFunction)_remote_debugging_zstd_available, METH_NOARGS, _remote_debugging_zstd_available__doc__},
+
+static PyObject *
+_remote_debugging_zstd_available_impl(PyObject *module);
+
+static PyObject *
+_remote_debugging_zstd_available(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+ return _remote_debugging_zstd_available_impl(module);
+}
+
PyDoc_STRVAR(_remote_debugging_get_child_pids__doc__,
"get_child_pids($module, /, pid, *, recursive=True)\n"
"--\n"
exit:
return return_value;
}
-/*[clinic end generated code: output=dc0550ad3d6a409c input=a9049054013a1b77]*/
+/*[clinic end generated code: output=036de0b06d0e34cc input=a9049054013a1b77]*/
******************************************************************************/
#include "_remote_debugging.h"
+#include "binary_io.h"
+
+/* Forward declarations for clinic-generated code */
+typedef struct {
+ PyObject_HEAD
+ BinaryWriter *writer;
+ uint32_t cached_total_samples; /* Preserved after finalize */
+} BinaryWriterObject;
+
+typedef struct {
+ PyObject_HEAD
+ BinaryReader *reader;
+} BinaryReaderObject;
+
#include "clinic/module.c.h"
/* ============================================================================
.slots = RemoteUnwinder_slots,
};
+/* Forward declarations for type specs defined later */
+static PyType_Spec BinaryWriter_spec;
+static PyType_Spec BinaryReader_spec;
+
/* ============================================================================
* MODULE INITIALIZATION
* ============================================================================ */
if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) {
return -1;
}
+
+ // Create BinaryWriter and BinaryReader types
+ CREATE_TYPE(m, st->BinaryWriter_Type, &BinaryWriter_spec);
+ if (PyModule_AddType(m, st->BinaryWriter_Type) < 0) {
+ return -1;
+ }
+
+ CREATE_TYPE(m, st->BinaryReader_Type, &BinaryReader_spec);
+ if (PyModule_AddType(m, st->BinaryReader_Type) < 0) {
+ return -1;
+ }
+
#ifdef Py_GIL_DISABLED
PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
#endif
Py_VISIT(state->ThreadInfo_Type);
Py_VISIT(state->InterpreterInfo_Type);
Py_VISIT(state->AwaitedInfo_Type);
+ Py_VISIT(state->BinaryWriter_Type);
+ Py_VISIT(state->BinaryReader_Type);
return 0;
}
Py_CLEAR(state->ThreadInfo_Type);
Py_CLEAR(state->InterpreterInfo_Type);
Py_CLEAR(state->AwaitedInfo_Type);
+ Py_CLEAR(state->BinaryWriter_Type);
+ Py_CLEAR(state->BinaryReader_Type);
return 0;
}
(void)remote_debugging_clear((PyObject *)mod);
}
-static PyModuleDef_Slot remote_debugging_slots[] = {
- {Py_mod_exec, _remote_debugging_exec},
- {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
- {Py_mod_gil, Py_MOD_GIL_NOT_USED},
- {0, NULL},
+/* ============================================================================
+ * BINARY WRITER CLASS
+ * ============================================================================ */
+
+#define BinaryWriter_CAST(op) ((BinaryWriterObject *)(op))
+
+/*[clinic input]
+class _remote_debugging.BinaryWriter "BinaryWriterObject *" "&PyBinaryWriter_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e948838b90a2003c]*/
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.__init__
+ filename: str
+ sample_interval_us: unsigned_long_long
+ start_time_us: unsigned_long_long
+ *
+ compression: int = 0
+
+High-performance binary writer for profiling data.
+
+Arguments:
+ filename: Path to output file
+ sample_interval_us: Sampling interval in microseconds
+ start_time_us: Start timestamp in microseconds (from time.monotonic() * 1e6)
+ compression: 0=none, 1=zstd (default: 0)
+
+Use as a context manager or call finalize() when done.
+[clinic start generated code]*/
+
+static int
+_remote_debugging_BinaryWriter___init___impl(BinaryWriterObject *self,
+ const char *filename,
+ unsigned long long sample_interval_us,
+ unsigned long long start_time_us,
+ int compression)
+/*[clinic end generated code: output=014c0306f1bacf4b input=57497fe3cb9214a6]*/
+{
+ if (self->writer) {
+ binary_writer_destroy(self->writer);
+ }
+
+ self->writer = binary_writer_create(filename, sample_interval_us, compression, start_time_us);
+ if (!self->writer) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.write_sample
+ stack_frames: object
+ timestamp_us: unsigned_long_long
+
+Write a sample to the binary file.
+
+Arguments:
+ stack_frames: List of InterpreterInfo objects
+ timestamp_us: Current timestamp in microseconds (from time.monotonic() * 1e6)
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter_write_sample_impl(BinaryWriterObject *self,
+ PyObject *stack_frames,
+ unsigned long long timestamp_us)
+/*[clinic end generated code: output=24d5b86679b4128f input=dce3148417482624]*/
+{
+ if (!self->writer) {
+ PyErr_SetString(PyExc_ValueError, "Writer is closed");
+ return NULL;
+ }
+
+ if (binary_writer_write_sample(self->writer, stack_frames, timestamp_us) < 0) {
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.finalize
+
+Finalize and close the binary file.
+
+Writes string/frame tables, footer, and updates header.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter_finalize_impl(BinaryWriterObject *self)
+/*[clinic end generated code: output=3534b88c6628de88 input=c02191750682f6a2]*/
+{
+ if (!self->writer) {
+ PyErr_SetString(PyExc_ValueError, "Writer is already closed");
+ return NULL;
+ }
+
+ /* Save total_samples before finalizing */
+ self->cached_total_samples = self->writer->total_samples;
+
+ if (binary_writer_finalize(self->writer) < 0) {
+ return NULL;
+ }
+
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.close
+
+Close the writer without finalizing (discards data).
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter_close_impl(BinaryWriterObject *self)
+/*[clinic end generated code: output=9571bb2256fd1fd2 input=6e0da206e60daf16]*/
+{
+ if (self->writer) {
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.__enter__
+
+Enter context manager.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter___enter___impl(BinaryWriterObject *self)
+/*[clinic end generated code: output=8eb95f61daf2d120 input=8ef14ee18da561d2]*/
+{
+ Py_INCREF(self);
+ return (PyObject *)self;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.__exit__
+ exc_type: object = None
+ exc_val: object = None
+ exc_tb: object = None
+
+Exit context manager, finalizing the file.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter___exit___impl(BinaryWriterObject *self,
+ PyObject *exc_type,
+ PyObject *exc_val,
+ PyObject *exc_tb)
+/*[clinic end generated code: output=61831f47c72a53c6 input=12334ce1009af37f]*/
+{
+ if (self->writer) {
+ /* Only finalize on normal exit (no exception) */
+ if (exc_type == Py_None) {
+ if (binary_writer_finalize(self->writer) < 0) {
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ return NULL;
+ }
+ }
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ }
+ Py_RETURN_FALSE;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryWriter.get_stats
+
+Get encoding statistics for the writer.
+
+Returns a dict with encoding statistics including repeat/full/suffix/pop-push
+record counts, frames written/saved, and compression ratio.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryWriter_get_stats_impl(BinaryWriterObject *self)
+/*[clinic end generated code: output=06522cd52544df89 input=82968491b53ad277]*/
+{
+ if (!self->writer) {
+ PyErr_SetString(PyExc_ValueError, "Writer is closed");
+ return NULL;
+ }
+ return binary_writer_get_stats(self->writer);
+}
+
+static PyObject *
+BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
+{
+ if (!self->writer) {
+ /* Use cached value after finalize/close */
+ return PyLong_FromUnsignedLong(self->cached_total_samples);
+ }
+ return PyLong_FromUnsignedLong(self->writer->total_samples);
+}
+
+static PyGetSetDef BinaryWriter_getset[] = {
+ {"total_samples", (getter)BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
+ {NULL}
+};
+
+static PyMethodDef BinaryWriter_methods[] = {
+ _REMOTE_DEBUGGING_BINARYWRITER_WRITE_SAMPLE_METHODDEF
+ _REMOTE_DEBUGGING_BINARYWRITER_FINALIZE_METHODDEF
+ _REMOTE_DEBUGGING_BINARYWRITER_CLOSE_METHODDEF
+ _REMOTE_DEBUGGING_BINARYWRITER___ENTER___METHODDEF
+ _REMOTE_DEBUGGING_BINARYWRITER___EXIT___METHODDEF
+ _REMOTE_DEBUGGING_BINARYWRITER_GET_STATS_METHODDEF
+ {NULL, NULL, 0, NULL}
+};
+
+static void
+BinaryWriter_dealloc(PyObject *op)
+{
+ BinaryWriterObject *self = BinaryWriter_CAST(op);
+ PyTypeObject *tp = Py_TYPE(self);
+ if (self->writer) {
+ binary_writer_destroy(self->writer);
+ }
+ tp->tp_free(self);
+ Py_DECREF(tp);
+}
+
+static PyType_Slot BinaryWriter_slots[] = {
+ {Py_tp_getset, BinaryWriter_getset},
+ {Py_tp_methods, BinaryWriter_methods},
+ {Py_tp_init, _remote_debugging_BinaryWriter___init__},
+ {Py_tp_dealloc, BinaryWriter_dealloc},
+ {0, NULL}
+};
+
+static PyType_Spec BinaryWriter_spec = {
+ .name = "_remote_debugging.BinaryWriter",
+ .basicsize = sizeof(BinaryWriterObject),
+ .flags = (
+ Py_TPFLAGS_DEFAULT
+ | Py_TPFLAGS_IMMUTABLETYPE
+ ),
+ .slots = BinaryWriter_slots,
+};
+
+/* ============================================================================
+ * BINARY READER CLASS
+ * ============================================================================ */
+
+#define BinaryReader_CAST(op) ((BinaryReaderObject *)(op))
+
+/*[clinic input]
+class _remote_debugging.BinaryReader "BinaryReaderObject *" "&PyBinaryReader_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=36400aaf6f53216d]*/
+
+/*[clinic input]
+_remote_debugging.BinaryReader.__init__
+ filename: str
+
+High-performance binary reader for profiling data.
+
+Arguments:
+ filename: Path to input file
+
+Use as a context manager or call close() when done.
+[clinic start generated code]*/
+
+static int
+_remote_debugging_BinaryReader___init___impl(BinaryReaderObject *self,
+ const char *filename)
+/*[clinic end generated code: output=9699226f7ae052bb input=4201f9cc500ef2f6]*/
+{
+ if (self->reader) {
+ binary_reader_close(self->reader);
+ }
+
+ self->reader = binary_reader_open(filename);
+ if (!self->reader) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.replay
+ collector: object
+ progress_callback: object = None
+
+Replay samples through a collector.
+
+Arguments:
+ collector: Collector object with collect() method
+ progress_callback: Optional callable(current, total)
+
+Returns:
+ Number of samples replayed
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader_replay_impl(BinaryReaderObject *self,
+ PyObject *collector,
+ PyObject *progress_callback)
+/*[clinic end generated code: output=442345562574b61c input=ebb687aed3e0f4f1]*/
+{
+ if (!self->reader) {
+ PyErr_SetString(PyExc_ValueError, "Reader is closed");
+ return NULL;
+ }
+
+ Py_ssize_t replayed = binary_reader_replay(self->reader, collector, progress_callback);
+ if (replayed < 0) {
+ return NULL;
+ }
+
+ return PyLong_FromSsize_t(replayed);
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.get_info
+
+Get metadata about the binary file.
+
+Returns:
+ Dict with file metadata
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader_get_info_impl(BinaryReaderObject *self)
+/*[clinic end generated code: output=7f641fbd39147391 input=02e75e39c8a6cd1f]*/
+{
+ if (!self->reader) {
+ PyErr_SetString(PyExc_ValueError, "Reader is closed");
+ return NULL;
+ }
+
+ return binary_reader_get_info(self->reader);
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.get_stats
+
+Get reconstruction statistics from replay.
+
+Returns a dict with statistics about record types decoded and samples
+reconstructed during replay.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader_get_stats_impl(BinaryReaderObject *self)
+/*[clinic end generated code: output=628b9ab5e4c4fd36 input=d8dd6654abd6c3c0]*/
+{
+ if (!self->reader) {
+ PyErr_SetString(PyExc_ValueError, "Reader is closed");
+ return NULL;
+ }
+ return binary_reader_get_stats(self->reader);
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.close
+
+Close the reader and free resources.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader_close_impl(BinaryReaderObject *self)
+/*[clinic end generated code: output=ad0238cf5240b4f8 input=b919a66c737712d5]*/
+{
+ if (self->reader) {
+ binary_reader_close(self->reader);
+ self->reader = NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.__enter__
+
+Enter context manager.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader___enter___impl(BinaryReaderObject *self)
+/*[clinic end generated code: output=fade133538e93817 input=4794844c9efdc4f6]*/
+{
+ Py_INCREF(self);
+ return (PyObject *)self;
+}
+
+/*[clinic input]
+_remote_debugging.BinaryReader.__exit__
+ exc_type: object = None
+ exc_val: object = None
+ exc_tb: object = None
+
+Exit context manager, closing the file.
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_BinaryReader___exit___impl(BinaryReaderObject *self,
+ PyObject *exc_type,
+ PyObject *exc_val,
+ PyObject *exc_tb)
+/*[clinic end generated code: output=2acdd36cfdc14e4a input=87284243d7935835]*/
+{
+ if (self->reader) {
+ binary_reader_close(self->reader);
+ self->reader = NULL;
+ }
+ Py_RETURN_FALSE;
+}
+
+static PyObject *
+BinaryReader_get_sample_count(BinaryReaderObject *self, void *closure)
+{
+ if (!self->reader) {
+ return PyLong_FromLong(0);
+ }
+ return PyLong_FromUnsignedLong(self->reader->sample_count);
+}
+
+static PyObject *
+BinaryReader_get_sample_interval_us(BinaryReaderObject *self, void *closure)
+{
+ if (!self->reader) {
+ return PyLong_FromLong(0);
+ }
+ return PyLong_FromUnsignedLongLong(self->reader->sample_interval_us);
+}
+
+static PyGetSetDef BinaryReader_getset[] = {
+ {"sample_count", (getter)BinaryReader_get_sample_count, NULL, "Number of samples in file", NULL},
+ {"sample_interval_us", (getter)BinaryReader_get_sample_interval_us, NULL, "Sample interval in microseconds", NULL},
+ {NULL}
};
+static PyMethodDef BinaryReader_methods[] = {
+ _REMOTE_DEBUGGING_BINARYREADER_REPLAY_METHODDEF
+ _REMOTE_DEBUGGING_BINARYREADER_GET_INFO_METHODDEF
+ _REMOTE_DEBUGGING_BINARYREADER_GET_STATS_METHODDEF
+ _REMOTE_DEBUGGING_BINARYREADER_CLOSE_METHODDEF
+ _REMOTE_DEBUGGING_BINARYREADER___ENTER___METHODDEF
+ _REMOTE_DEBUGGING_BINARYREADER___EXIT___METHODDEF
+ {NULL, NULL, 0, NULL}
+};
+
+static void
+BinaryReader_dealloc(PyObject *op)
+{
+ BinaryReaderObject *self = BinaryReader_CAST(op);
+ PyTypeObject *tp = Py_TYPE(self);
+ if (self->reader) {
+ binary_reader_close(self->reader);
+ }
+ tp->tp_free(self);
+ Py_DECREF(tp);
+}
+
+static PyType_Slot BinaryReader_slots[] = {
+ {Py_tp_getset, BinaryReader_getset},
+ {Py_tp_methods, BinaryReader_methods},
+ {Py_tp_init, _remote_debugging_BinaryReader___init__},
+ {Py_tp_dealloc, BinaryReader_dealloc},
+ {0, NULL}
+};
+
+static PyType_Spec BinaryReader_spec = {
+ .name = "_remote_debugging.BinaryReader",
+ .basicsize = sizeof(BinaryReaderObject),
+ .flags = (
+ Py_TPFLAGS_DEFAULT
+ | Py_TPFLAGS_IMMUTABLETYPE
+ ),
+ .slots = BinaryReader_slots,
+};
+
+/* ============================================================================
+ * MODULE METHODS
+ * ============================================================================ */
+
+/*[clinic input]
+_remote_debugging.zstd_available
+
+Check if zstd compression is available.
+
+Returns:
+ True if zstd available, False otherwise
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_zstd_available_impl(PyObject *module)
+/*[clinic end generated code: output=55e35a70ef280cdd input=a1b4d41bc09c7cf9]*/
+{
+ return PyBool_FromLong(binary_io_zstd_available());
+}
+
/* ============================================================================
* MODULE-LEVEL FUNCTIONS
* ============================================================================ */
}
static PyMethodDef remote_debugging_methods[] = {
+ _REMOTE_DEBUGGING_ZSTD_AVAILABLE_METHODDEF
_REMOTE_DEBUGGING_GET_CHILD_PIDS_METHODDEF
_REMOTE_DEBUGGING_IS_PYTHON_PROCESS_METHODDEF
{NULL, NULL, 0, NULL},
};
+static PyModuleDef_Slot remote_debugging_slots[] = {
+ {Py_mod_exec, _remote_debugging_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+ {0, NULL},
+};
+
static struct PyModuleDef remote_debugging_module = {
PyModuleDef_HEAD_INIT,
.m_name = "_remote_debugging",
<ClCompile Include="..\Modules\_remote_debugging\frame_cache.c" />
<ClCompile Include="..\Modules\_remote_debugging\threads.c" />
<ClCompile Include="..\Modules\_remote_debugging\asyncio.c" />
+ <ClCompile Include="..\Modules\_remote_debugging\binary_io_writer.c" />
+ <ClCompile Include="..\Modules\_remote_debugging\binary_io_reader.c" />
<ClCompile Include="..\Modules\_remote_debugging\subprocess.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Modules\_remote_debugging\_remote_debugging.h" />
+ <ClInclude Include="..\Modules\_remote_debugging\binary_io.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="..\PC\python_nt.rc" />
<ClCompile Include="..\Modules\_remote_debugging\asyncio.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Modules\_remote_debugging\binary_io_writer.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\Modules\_remote_debugging\binary_io_reader.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Modules\_remote_debugging\subprocess.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClInclude Include="..\Modules\_remote_debugging\_remote_debugging.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\Modules\_remote_debugging\binary_io.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="..\PC\python_nt.rc">
_abs('Modules/_hacl/*.c'): (200_000, 500),
_abs('Modules/posixmodule.c'): (20_000, 500),
_abs('Modules/termios.c'): (10_000, 800),
+ _abs('Modules/_remote_debugging/*.h'): (20_000, 1000),
_abs('Modules/_testcapimodule.c'): (20_000, 400),
_abs('Modules/expat/expat.h'): (10_000, 400),
_abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
HAVE_GETHOSTBYNAME_R_5_ARG
HAVE_GETHOSTBYNAME_R_6_ARG
LIBOBJS
+REMOTE_DEBUGGING_LIBS
+REMOTE_DEBUGGING_CFLAGS
LIBZSTD_LIBS
LIBZSTD_CFLAGS
LIBLZMA_LIBS
have_libzstd=yes
fi
+if test "x$have_libzstd" = xyes
+then :
+
+ REMOTE_DEBUGGING_CFLAGS="-DHAVE_ZSTD $LIBZSTD_CFLAGS"
+ REMOTE_DEBUGGING_LIBS="$LIBZSTD_LIBS"
+
+else case e in #(
+ e)
+ REMOTE_DEBUGGING_CFLAGS=""
+ REMOTE_DEBUGGING_LIBS=""
+ ;;
+esac
+fi
+
+
+
if test "x$py_cv_module__remote_debugging" = xyes
then :
-
-
+ as_fn_append MODULE_BLOCK "MODULE__REMOTE_DEBUGGING_CFLAGS=$REMOTE_DEBUGGING_CFLAGS$as_nl"
+ as_fn_append MODULE_BLOCK "MODULE__REMOTE_DEBUGGING_LDFLAGS=$REMOTE_DEBUGGING_LIBS$as_nl"
fi
])
])
+dnl _remote_debugging module: optional zstd compression support
+dnl The module always builds, but zstd compression is only available when libzstd is found
+AS_VAR_IF([have_libzstd], [yes], [
+ REMOTE_DEBUGGING_CFLAGS="-DHAVE_ZSTD $LIBZSTD_CFLAGS"
+ REMOTE_DEBUGGING_LIBS="$LIBZSTD_LIBS"
+], [
+ REMOTE_DEBUGGING_CFLAGS=""
+ REMOTE_DEBUGGING_LIBS=""
+])
+AC_SUBST([REMOTE_DEBUGGING_CFLAGS])
+AC_SUBST([REMOTE_DEBUGGING_LIBS])
+
dnl PY_CHECK_NETDB_FUNC(FUNCTION)
AC_DEFUN([PY_CHECK_NETDB_FUNC], [PY_CHECK_FUNC([$1], [@%:@include <netdb.h>])])
PY_STDLIB_MOD_SIMPLE([_posixsubprocess])
PY_STDLIB_MOD_SIMPLE([_queue])
PY_STDLIB_MOD_SIMPLE([_random])
-PY_STDLIB_MOD_SIMPLE([_remote_debugging])
+PY_STDLIB_MOD_SIMPLE([_remote_debugging], [$REMOTE_DEBUGGING_CFLAGS], [$REMOTE_DEBUGGING_LIBS])
PY_STDLIB_MOD_SIMPLE([select])
PY_STDLIB_MOD_SIMPLE([_struct])
PY_STDLIB_MOD_SIMPLE([_types])