def create_binary_file(self, samples, interval=1000, compression="none"):
"""Create a test binary file and track it for cleanup."""
+ filename, _ = self.write_binary_file(samples, interval, compression)
+ return filename
+
+ def write_binary_file(self, samples, interval=1000, compression="none"):
+ """Like create_binary_file but also returns the writer collector."""
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
filename = f.name
self.temp_files.append(filename)
for sample in samples:
collector.collect(sample)
collector.export(None)
- return filename
+ return filename, collector
def roundtrip(self, samples, interval=1000, compression="none"):
"""Write samples to binary and read back."""
with BinaryReader("/nonexistent/path/file.bin") as reader:
reader.replay_samples(RawCollector())
+ def test_writer_handles_empty_stack_first_sample(self):
+ """BinaryWriter.write_sample tolerates an empty stack on a fresh thread.
+
+ Regression test for the C-level RLE bug in process_thread_sample: a
+ freshly-created ThreadEntry has prev_stack_depth == 0, so an empty
+ curr_stack compares as STACK_REPEAT against the zero-initialized
+ previous stack. Before the fix, this fell through the
+ `&& !is_new_thread` guard into write_sample_with_encoding, which had
+ no handler for STACK_REPEAT and raised
+ RuntimeError("Invalid stack encoding type"). Goes through
+ BinaryWriter.write_sample directly so the test cannot be masked by
+ any Python-level filtering.
+ """
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+ empty_sample = [
+ make_interpreter(
+ 0, [make_thread(99, [], status=THREAD_STATUS_UNKNOWN)]
+ )
+ ]
+ # First sample for a fresh thread has empty frame_info — the exact
+ # scenario that exposes the bug.
+ writer.write_sample(empty_sample, 1000)
+ writer.write_sample(empty_sample, 2000)
+ # Mix in a real sample to exercise the transition out of the
+ # empty-stack RLE buffer.
+ real_sample = [
+ make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+ ]
+ writer.write_sample(real_sample, 3000)
+ writer.finalize()
+
+ reader_collector = RawCollector()
+ with BinaryReader(filename) as reader:
+ count = reader.replay_samples(reader_collector)
+ # Empty-stack samples are recorded as STACK_REPEAT records with
+ # depth-0 stacks; the file must replay all three samples.
+ self.assertEqual(count, 3)
+
+ def test_writer_handles_mixed_empty_and_real_first_sample(self):
+ """First sample with one empty + one real thread roundtrips through C."""
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+ sample = [
+ make_interpreter(
+ 0,
+ [
+ make_thread(1, [make_frame("a.py", 1, "f")]),
+ make_thread(99, [], status=THREAD_STATUS_UNKNOWN),
+ ],
+ )
+ ]
+ # Two samples so RLE state is exercised.
+ writer.write_sample(sample, 1000)
+ writer.write_sample(sample, 2000)
+ writer.finalize()
+
+ # Replay must succeed without raising RuntimeError, and the real
+ # thread's frames must round-trip.
+ reader_collector = RawCollector()
+ with BinaryReader(filename) as reader:
+ reader.replay_samples(reader_collector)
+ self.assertIn((0, 1), reader_collector.by_thread)
+ self.assertEqual(len(reader_collector.by_thread[(0, 1)]), 2)
+
+ def test_writer_total_samples_after_finalize_matches_reader(self):
+ """BinaryWriter.total_samples after finalize() matches the reader's count."""
+ # Five IDENTICAL samples force every sample beyond the first into the
+ # per-thread RLE buffer. Regression for the cached_total_samples
+ # ordering bug: capturing the cache BEFORE binary_writer_finalize()
+ # missed the buffered samples that flush_pending_rle() counts. Keep
+ # the samples identical to preserve coverage. See gh-149342.
+ samples = [
+ [make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])]
+ ] * 5
+ filename, writer_collector = self.write_binary_file(samples)
+ reader_collector = RawCollector()
+ with BinaryReader(filename) as reader:
+ replayed = reader.replay_samples(reader_collector)
+ self.assertEqual(writer_collector.total_samples, len(samples))
+ self.assertEqual(writer_collector.total_samples, replayed)
+
+ def test_writer_total_samples_after_context_manager_matches_reader(self):
+ """total_samples after `with BinaryWriter(...)` matches the reader's count.
+
+ Regression for the asymmetry between finalize() and __exit__ in
+ module.c: __exit__ also calls binary_writer_finalize and must
+ preserve cached_total_samples like finalize() does, otherwise the
+ getter returns 0 once self->writer is NULL.
+ """
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ sample = [
+ make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+ ]
+ with _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) as w:
+ for i in range(5):
+ w.write_sample(sample, i * 1000)
+ self.assertEqual(w.total_samples, 5)
+
+ reader_collector = RawCollector()
+ with BinaryReader(filename) as reader:
+ self.assertEqual(reader.replay_samples(reader_collector), 5)
+
+ def test_writer_total_samples_after_close_returns_zero(self):
+ """close() discards data; total_samples reflects no cached count."""
+ with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+ filename = f.name
+ self.temp_files.append(filename)
+
+ w = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+ sample = [
+ make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+ ]
+ for i in range(5):
+ w.write_sample(sample, i * 1000)
+ w.close()
+ self.assertEqual(w.total_samples, 0)
+
class TestBinaryEncodings(BinaryFormatTestBase):
"""Tests specifically targeting different stack encodings."""
entry->prev_stack_capacity = MAX_STACK_DEPTH;
entry->pending_rle_capacity = INITIAL_RLE_CAPACITY;
- entry->prev_stack = PyMem_Malloc(entry->prev_stack_capacity * sizeof(uint32_t));
+ entry->prev_stack = PyMem_Calloc(entry->prev_stack_capacity, sizeof(uint32_t));
if (!entry->prev_stack) {
PyErr_NoMemory();
return NULL;
}
uint8_t status = (uint8_t)status_long;
- int is_new_thread = 0;
ThreadEntry *entry = writer_get_or_create_thread_entry(
- writer, thread_id, interpreter_id, &is_new_thread);
+ writer, thread_id, interpreter_id, NULL);
if (!entry) {
return -1;
}
curr_stack, curr_depth,
&shared_count, &pop_count, &push_count);
- if (encoding == STACK_REPEAT && !is_new_thread) {
- /* Buffer this sample for RLE */
+ if (encoding == STACK_REPEAT) {
+ /* Buffer this sample for RLE.
+ *
+ * STACK_REPEAT also covers the "first sample for a fresh thread,
+ * empty stack" case: a new ThreadEntry has prev_stack_depth == 0
+ * and a zero-initialized prev_stack, so compare_stacks() returns
+ * STACK_REPEAT against an empty curr_stack (depth 0). Buffering
+ * it here is correct; the RLE flush path emits it as a normal
+ * STACK_REPEAT record. */
if (GROW_ARRAY(entry->pending_rle, entry->pending_rle_count,
entry->pending_rle_capacity, PendingRLESample) < 0) {
return -1;
Py_RETURN_NONE;
}
+/* Finalize the writer, cache total_samples, and destroy it.
+ *
+ * The cache assignment must happen AFTER binary_writer_finalize(): finalize
+ * flushes pending RLE samples via flush_pending_rle(), which increments
+ * writer->total_samples for each one. Caching before finalize would lose
+ * those trailing samples. */
+static int
+binary_writer_finalize_and_cache(BinaryWriterObject *self)
+{
+ if (binary_writer_finalize(self->writer) < 0) {
+ return -1;
+ }
+ self->cached_total_samples = self->writer->total_samples;
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ return 0;
+}
+
/*[clinic input]
_remote_debugging.BinaryWriter.finalize
return NULL;
}
- /* Save total_samples before finalizing */
- self->cached_total_samples = self->writer->total_samples;
-
- if (binary_writer_finalize(self->writer) < 0) {
+ if (binary_writer_finalize_and_cache(self) < 0) {
return NULL;
}
- binary_writer_destroy(self->writer);
- self->writer = NULL;
-
Py_RETURN_NONE;
}
if (self->writer) {
/* Only finalize on normal exit (no exception) */
if (exc_type == Py_None) {
- if (binary_writer_finalize(self->writer) < 0) {
- binary_writer_destroy(self->writer);
- self->writer = NULL;
+ if (binary_writer_finalize_and_cache(self) < 0) {
+ if (self->writer) {
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ }
return NULL;
}
}
- binary_writer_destroy(self->writer);
- self->writer = NULL;
+ else {
+ binary_writer_destroy(self->writer);
+ self->writer = NULL;
+ }
}
Py_RETURN_FALSE;
}
}
static PyObject *
-BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
+BinaryWriter_get_total_samples(PyObject *op, void *closure)
{
+ BinaryWriterObject *self = BinaryWriter_CAST(op);
if (!self->writer) {
/* Use cached value after finalize/close */
return PyLong_FromUnsignedLong(self->cached_total_samples);
}
static PyGetSetDef BinaryWriter_getset[] = {
- {"total_samples", (getter)BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
+ {"total_samples", BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
{NULL}
};