From ea51e745c713e08ea76620c5b42105dbf6952d8e Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <Pablogsal@gmail.com>
Date: Sun, 30 Nov 2025 01:42:39 +0000
Subject: [PATCH] gh-138122: Add thread status statistics to flamegraph
 profiler (#141900)

Co-authored-by: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
---
 Lib/profiling/sampling/collector.py           |  81 ++++-
 Lib/profiling/sampling/flamegraph.css         | 175 +++++++++-
 Lib/profiling/sampling/flamegraph.js          |  93 ++++-
 .../sampling/flamegraph_template.html         |  24 ++
 Lib/profiling/sampling/sample.py              |   3 +-
 Lib/profiling/sampling/stack_collector.py     |  88 ++++-
 .../test_sampling_profiler/test_collectors.py | 329 +++++++++++++++++-
 ...-11-24-14-05-52.gh-issue-138122.2bbGA8.rst |   5 +
 8 files changed, 777 insertions(+), 21 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 27d40156d1f6..6187f351cb59 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -19,7 +19,6 @@ class Collector(ABC):
         """Export collected data to a file."""
 
     def _iter_all_frames(self, stack_frames, skip_idle=False):
-        """Iterate over all frame stacks from all interpreters and threads."""
         for interpreter_info in stack_frames:
             for thread_info in interpreter_info.threads:
                 # skip_idle now means: skip if thread is not actively running
@@ -33,3 +32,83 @@ class Collector(ABC):
                 frames = thread_info.frame_info
                 if frames:
                     yield frames, thread_info.thread_id
+
+    def _is_gc_frame(self, frame):
+        if isinstance(frame, tuple):
+            funcname = frame[2] if len(frame) >= 3 else ""
+        else:
+            funcname = getattr(frame, "funcname", "")
+
+        return "<GC>" in funcname or "gc_collect" in funcname
+
+    def _collect_thread_status_stats(self, stack_frames):
+        """Collect aggregate and per-thread status statistics from a sample.
+
+        Returns:
+            tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
+                - aggregate_status_counts: dict with has_gil, on_cpu, etc.
+                - has_gc_frame: bool indicating if any thread has GC frames
+                - per_thread_stats: dict mapping thread_id to per-thread counts
+        """
+        status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        has_gc_frame = False
+        per_thread_stats = {}
+
+        for interpreter_info in stack_frames:
+            threads = getattr(interpreter_info, "threads", [])
+            for thread_info in threads:
+                status_counts["total"] += 1
+
+                # Track thread status using bit flags
+                status_flags = getattr(thread_info, "status", 0)
+
+                if status_flags & THREAD_STATUS_HAS_GIL:
+                    status_counts["has_gil"] += 1
+                if status_flags & THREAD_STATUS_ON_CPU:
+                    status_counts["on_cpu"] += 1
+                if status_flags & THREAD_STATUS_GIL_REQUESTED:
+                    status_counts["gil_requested"] += 1
+                if status_flags & THREAD_STATUS_UNKNOWN:
+                    status_counts["unknown"] += 1
+
+                # Track per-thread statistics
+                thread_id = getattr(thread_info, "thread_id", None)
+                if thread_id is not None:
+                    if thread_id not in per_thread_stats:
+                        per_thread_stats[thread_id] = {
+                            "has_gil": 0,
+                            "on_cpu": 0,
+                            "gil_requested": 0,
+                            "unknown": 0,
+                            "total": 0,
+                            "gc_samples": 0,
+                        }
+
+                    thread_stats = per_thread_stats[thread_id]
+                    thread_stats["total"] += 1
+
+                    if status_flags & THREAD_STATUS_HAS_GIL:
+                        thread_stats["has_gil"] += 1
+                    if status_flags & THREAD_STATUS_ON_CPU:
+                        thread_stats["on_cpu"] += 1
+                    if status_flags & THREAD_STATUS_GIL_REQUESTED:
+                        thread_stats["gil_requested"] += 1
+                    if status_flags & THREAD_STATUS_UNKNOWN:
+                        thread_stats["unknown"] += 1
+
+                    # Check for GC frames in this thread
+                    frames = getattr(thread_info, "frame_info", None)
+                    if frames:
+                        for frame in frames:
+                            if self._is_gc_frame(frame):
+                                thread_stats["gc_samples"] += 1
+                                has_gc_frame = True
+                                break
+
+        return status_counts, has_gc_frame, per_thread_stats
diff --git a/Lib/profiling/sampling/flamegraph.css b/Lib/profiling/sampling/flamegraph.css
index 67754ca609aa..0a6fde2ad329 100644
--- a/Lib/profiling/sampling/flamegraph.css
+++ b/Lib/profiling/sampling/flamegraph.css
@@ -108,6 +108,143 @@ body {
   gap: 20px;
 }
 
+/* Compact Thread Stats Bar - Colorful Square Design */
+.thread-stats-bar {
+  background: rgba(255, 255, 255, 0.95);
+  padding: 12px 24px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: 16px;
+  font-size: 13px;
+  box-shadow: 0 2px 8px rgba(55, 118, 171, 0.2);
+}
+
+.thread-stat-item {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  background: white;
+  padding: 6px 14px;
+  border-radius: 4px;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+  transition: all 0.3s ease;
+  border: 2px solid;
+  min-width: 115px;
+  justify-content: center;
+  animation: fadeIn 0.5s ease-out backwards;
+}
+
+.thread-stat-item:nth-child(1) { animation-delay: 0s; }
+.thread-stat-item:nth-child(3) { animation-delay: 0.1s; }
+.thread-stat-item:nth-child(5) { animation-delay: 0.2s; }
+.thread-stat-item:nth-child(7) { animation-delay: 0.3s; }
+
+@keyframes fadeIn {
+  from {
+    opacity: 0;
+  }
+  to {
+    opacity: 1;
+  }
+}
+
+@keyframes slideUp {
+  from {
+    opacity: 0;
+    transform: translateY(15px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+@keyframes gentlePulse {
+  0%, 100% { box-shadow: 0 2px 8px rgba(55, 118, 171, 0.15); }
+  50% { box-shadow: 0 2px 16px rgba(55, 118, 171, 0.4); }
+}
+
+/* Color-coded borders and subtle glow on hover */
+#gil-held-stat {
+  --stat-color: 40, 167, 69;
+  border-color: rgb(var(--stat-color));
+  background: linear-gradient(135deg, rgba(var(--stat-color), 0.06) 0%, #ffffff 100%);
+}
+
+#gil-released-stat {
+  --stat-color: 220, 53, 69;
+  border-color: rgb(var(--stat-color));
+  background: linear-gradient(135deg, rgba(var(--stat-color), 0.06) 0%, #ffffff 100%);
+}
+
+#gil-waiting-stat {
+  --stat-color: 255, 193, 7;
+  border-color: rgb(var(--stat-color));
+  background: linear-gradient(135deg, rgba(var(--stat-color), 0.06) 0%, #ffffff 100%);
+}
+
+#gc-stat {
+  --stat-color: 111, 66, 193;
+  border-color: rgb(var(--stat-color));
+  background: linear-gradient(135deg, rgba(var(--stat-color), 0.06) 0%, #ffffff 100%);
+}
+
+#gil-held-stat:hover,
+#gil-released-stat:hover,
+#gil-waiting-stat:hover,
+#gc-stat:hover {
+  box-shadow: 0 0 12px rgba(var(--stat-color), 0.4), 0 1px 3px rgba(0, 0, 0, 0.08);
+}
+
+.thread-stat-item .stat-label {
+  color: #5a6c7d;
+  font-weight: 600;
+  font-size: 11px;
+  letter-spacing: 0.3px;
+}
+
+.thread-stat-item .stat-value {
+  color: #2e3338;
+  font-weight: 800;
+  font-size: 14px;
+  font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
+}
+
+.thread-stat-separator {
+  color: rgba(0, 0, 0, 0.15);
+  font-weight: 300;
+  font-size: 16px;
+  position: relative;
+  z-index: 1;
+}
+
+/* Responsive - stack on small screens */
+@media (max-width: 768px) {
+  .thread-stats-bar {
+    flex-wrap: wrap;
+    gap: 8px;
+    font-size: 11px;
+    padding: 10px 16px;
+  }
+
+  .thread-stat-item {
+    padding: 4px 10px;
+  }
+
+  .thread-stat-item .stat-label {
+    font-size: 11px;
+  }
+
+  .thread-stat-item .stat-value {
+    font-size: 12px;
+  }
+
+  .thread-stat-separator {
+    display: none;
+  }
+}
+
 .stat-card {
   background: #ffffff;
   border: 1px solid #e9ecef;
@@ -119,8 +256,13 @@ body {
   box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
   transition: all 0.2s ease;
   min-height: 120px;
+  animation: slideUp 0.4s ease-out backwards;
 }
 
+.stat-card:nth-child(1) { animation-delay: 0.1s; }
+.stat-card:nth-child(2) { animation-delay: 0.2s; }
+.stat-card:nth-child(3) { animation-delay: 0.3s; }
+
 .stat-card:hover {
   box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
   transform: translateY(-2px);
@@ -218,6 +360,11 @@ body {
   box-shadow: 0 4px 8px rgba(55, 118, 171, 0.3);
 }
 
+.controls button:active {
+  transform: translateY(1px);
+  box-shadow: 0 1px 2px rgba(55, 118, 171, 0.2);
+}
+
 .controls button.secondary {
   background: #ffd43b;
   color: #2e3338;
@@ -227,6 +374,10 @@ body {
   background: #ffcd02;
 }
 
+.controls button.secondary:active {
+  background: #e6b800;
+}
+
 .thread-filter-wrapper {
   display: none;
   align-items: center;
@@ -368,11 +519,14 @@ body {
   display: flex;
   align-items: center;
   justify-content: center;
-  transition: background 0.2s;
+  transition: background 0.2s, transform 0.2s;
+  animation: gentlePulse 3s ease-in-out infinite;
 }
 
 #show-info-btn:hover {
   background: #2d5aa0;
+  animation: none;
+  transform: scale(1.05);
 }
 
 #close-info-btn {
@@ -486,3 +640,22 @@ body {
     font-size: 12px !important;
   }
 }
+
+/* Accessibility: visible focus states */
+button:focus-visible,
+select:focus-visible,
+input:focus-visible {
+  outline: 2px solid #ffd43b;
+  outline-offset: 2px;
+}
+
+/* Smooth panel transitions */
+.legend-panel,
+.info-panel {
+  transition: opacity 0.2s ease, transform 0.2s ease;
+}
+
+.legend-panel[style*="block"],
+.info-panel[style*="block"] {
+  animation: slideUp 0.2s ease-out;
+}
diff --git a/Lib/profiling/sampling/flamegraph.js b/Lib/profiling/sampling/flamegraph.js
index 670ca22d442e..7faac0effbc5 100644
--- a/Lib/profiling/sampling/flamegraph.js
+++ b/Lib/profiling/sampling/flamegraph.js
@@ -401,9 +401,93 @@ if (document.readyState === "loading") {
   initFlamegraph();
 }
 
+// Mode constants (must match constants.py)
+const PROFILING_MODE_WALL = 0;
+const PROFILING_MODE_CPU = 1;
+const PROFILING_MODE_GIL = 2;
+const PROFILING_MODE_ALL = 3;
+
+function populateThreadStats(data, selectedThreadId = null) {
+  // Check if thread statistics are available
+  const stats = data?.stats;
+  if (!stats || !stats.thread_stats) {
+    return; // No thread stats available
+  }
+
+  const mode = stats.mode !== undefined ? stats.mode : PROFILING_MODE_WALL;
+  let threadStats;
+
+  // If a specific thread is selected, use per-thread stats
+  if (selectedThreadId !== null && stats.per_thread_stats && stats.per_thread_stats[selectedThreadId]) {
+    threadStats = stats.per_thread_stats[selectedThreadId];
+  } else {
+    threadStats = stats.thread_stats;
+  }
+
+  // Validate threadStats object
+  if (!threadStats || typeof threadStats.total !== 'number') {
+    return; // Invalid thread stats
+  }
+
+  const bar = document.getElementById('thread-stats-bar');
+  if (!bar) {
+    return; // DOM element not found
+  }
+
+  // Show the bar if we have valid thread stats
+  if (threadStats.total > 0) {
+    bar.style.display = 'flex';
+
+    // Hide/show GIL stats items in GIL mode
+    const gilHeldStat = document.getElementById('gil-held-stat');
+    const gilReleasedStat = document.getElementById('gil-released-stat');
+    const gilWaitingStat = document.getElementById('gil-waiting-stat');
+    const separators = bar.querySelectorAll('.thread-stat-separator');
+
+    if (mode === PROFILING_MODE_GIL) {
+      // In GIL mode, hide GIL-related stats
+      if (gilHeldStat) gilHeldStat.style.display = 'none';
+      if (gilReleasedStat) gilReleasedStat.style.display = 'none';
+      if (gilWaitingStat) gilWaitingStat.style.display = 'none';
+      separators.forEach((sep, i) => {
+        if (i < 3) sep.style.display = 'none';
+      });
+    } else {
+      // Show all stats in other modes
+      if (gilHeldStat) gilHeldStat.style.display = 'inline-flex';
+      if (gilReleasedStat) gilReleasedStat.style.display = 'inline-flex';
+      if (gilWaitingStat) gilWaitingStat.style.display = 'inline-flex';
+      separators.forEach(sep => sep.style.display = 'inline');
+
+      // GIL Held
+      const gilHeldPct = threadStats.has_gil_pct || 0;
+      const gilHeldPctElem = document.getElementById('gil-held-pct');
+      if (gilHeldPctElem) gilHeldPctElem.textContent = `${gilHeldPct.toFixed(2)}%`;
+
+      // GIL Released (threads running without GIL)
+      const gilReleasedPct = threadStats.on_cpu_pct || 0;
+      const gilReleasedPctElem = document.getElementById('gil-released-pct');
+      if (gilReleasedPctElem) gilReleasedPctElem.textContent = `${gilReleasedPct.toFixed(2)}%`;
+
+      // Waiting for GIL
+      const gilWaitingPct = threadStats.gil_requested_pct || 0;
+      const gilWaitingPctElem = document.getElementById('gil-waiting-pct');
+      if (gilWaitingPctElem) gilWaitingPctElem.textContent = `${gilWaitingPct.toFixed(2)}%`;
+    }
+
+    // Garbage Collection (always show)
+    const gcPct = threadStats.gc_pct || 0;
+    const gcPctElem = document.getElementById('gc-pct');
+    if (gcPctElem) gcPctElem.textContent = `${gcPct.toFixed(2)}%`;
+  }
+}
+
 function populateStats(data) {
   const totalSamples = data.value || 0;
 
+  // Populate thread statistics if available
+  populateThreadStats(data);
+
   // Collect all functions with their metrics, aggregated by function name
   const functionMap = new Map();
 
@@ -579,13 +663,15 @@ function filterByThread() {
   currentThreadFilter = selectedThread;
 
   let filteredData;
+  let selectedThreadId = null;
+
   if (selectedThread === 'all') {
     // Show all data
     filteredData = originalData;
   } else {
     // Filter data by thread
-    const threadId = parseInt(selectedThread);
-    filteredData = filterDataByThread(originalData, threadId);
+    selectedThreadId = parseInt(selectedThread);
+    filteredData = filterDataByThread(originalData, selectedThreadId);
 
     if (filteredData.strings) {
       stringTable = filteredData.strings;
@@ -597,6 +683,9 @@ function filterByThread() {
   const tooltip = createPythonTooltip(filteredData);
   const chart = createFlamegraph(tooltip, filteredData.value);
   renderFlamegraph(chart, filteredData);
+
+  // Update thread stats to show per-thread or aggregate stats
+  populateThreadStats(originalData, selectedThreadId);
 }
 
 function filterDataByThread(data, threadId) {
diff --git a/Lib/profiling/sampling/flamegraph_template.html b/Lib/profiling/sampling/flamegraph_template.html
index 585a1abb61f8..5f94bbe69c4f 100644
--- a/Lib/profiling/sampling/flamegraph_template.html
+++ b/Lib/profiling/sampling/flamegraph_template.html
@@ -26,7 +26,31 @@
       </div>
     </div>
 
+    <!-- Compact Thread Stats Bar -->
+    <div class="thread-stats-bar" id="thread-stats-bar" style="display: none;">
+      <span class="thread-stat-item" id="gil-held-stat">
+        <span class="stat-label">ð¢ GIL Held:</span>
+        <span class="stat-value" id="gil-held-pct">--</span>
+      </span>
+      <span class="thread-stat-separator">â</span>
+      <span class="thread-stat-item" id="gil-released-stat">
+        <span class="stat-label">ð´ GIL Released:</span>
+        <span class="stat-value" id="gil-released-pct">--</span>
+      </span>
+      <span class="thread-stat-separator">â</span>
+      <span class="thread-stat-item" id="gil-waiting-stat">
+        <span class="stat-label">ð¡ Waiting:</span>
+        <span class="stat-value" id="gil-waiting-pct">--</span>
+      </span>
+      <span class="thread-stat-separator">â</span>
+      <span class="thread-stat-item" id="gc-stat">
+        <span class="stat-label">ðï¸ GC:</span>
+        <span class="stat-value" id="gc-pct">--</span>
+      </span>
+    </div>
+
     <div class="stats-section">
+      <!-- Hot Spots -->
       <div class="stats-container">
         <div class="stat-card hotspot-card">
           <div class="stat-icon">ð¥</div>
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index f3fa441a35f4..bcc24319aab0 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -31,6 +31,7 @@ class SampleProfiler:
         self.pid = pid
         self.sample_interval_usec = sample_interval_usec
         self.all_threads = all_threads
+        self.mode = mode  # Store mode for later use
         if _FREE_THREADED_BUILD:
             self.unwinder = _remote_debugging.RemoteUnwinder(
                 self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
@@ -117,7 +118,7 @@ class SampleProfiler:
 
         # Pass stats to flamegraph collector if it's the right type
         if hasattr(collector, 'set_stats'):
-            collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate)
+            collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, mode=self.mode)
 
         expected_samples = int(duration_sec / sample_interval_sec)
         if num_samples < expected_samples and not is_live_mode:
diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py
index 51d13a648bfa..9028a8bebb19 100644
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@@ -62,17 +62,65 @@ class FlamegraphCollector(StackTraceCollector):
         self.stats = {}
         self._root = {"samples": 0, "children": {}, "threads": set()}
         self._total_samples = 0
+        self._sample_count = 0  # Track actual number of samples (not thread traces)
         self._func_intern = {}
         self._string_table = StringTable()
         self._all_threads = set()
 
-    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
+        # Thread status statistics (similar to LiveStatsCollector)
+        self.thread_status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        self.samples_with_gc_frames = 0
+
+        # Per-thread statistics
+        self.per_thread_stats = {}  # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}}
+
+    def collect(self, stack_frames, skip_idle=False):
+        """Override to track thread status statistics before processing frames."""
+        # Increment sample count once per sample
+        self._sample_count += 1
+
+        # Collect both aggregate and per-thread statistics using base method
+        status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+
+        # Merge aggregate status counts
+        for key in status_counts:
+            self.thread_status_counts[key] += status_counts[key]
+
+        # Update aggregate GC frame count
+        if has_gc_frame:
+            self.samples_with_gc_frames += 1
+
+        # Merge per-thread statistics
+        for thread_id, stats in per_thread_stats.items():
+            if thread_id not in self.per_thread_stats:
+                self.per_thread_stats[thread_id] = {
+                    "has_gil": 0,
+                    "on_cpu": 0,
+                    "gil_requested": 0,
+                    "unknown": 0,
+                    "total": 0,
+                    "gc_samples": 0,
+                }
+            for key, value in stats.items():
+                self.per_thread_stats[thread_id][key] += value
+
+        # Call parent collect to process frames
+        super().collect(stack_frames, skip_idle=skip_idle)
+
+    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, mode=None):
         """Set profiling statistics to include in flamegraph data."""
         self.stats = {
             "sample_interval_usec": sample_interval_usec,
             "duration_sec": duration_sec,
             "sample_rate": sample_rate,
-            "error_rate": error_rate
+            "error_rate": error_rate,
+            "mode": mode
         }
 
     def export(self, filename):
@@ -117,7 +165,6 @@ class FlamegraphCollector(StackTraceCollector):
         return f"{funcname} ({filename}:{lineno})"
 
     def _convert_to_flamegraph_format(self):
-        """Convert aggregated trie to d3-flamegraph format with string table optimization."""
         if self._total_samples == 0:
             return {
                 "name": self._string_table.intern("No Data"),
@@ -178,6 +225,29 @@ class FlamegraphCollector(StackTraceCollector):
                 "strings": self._string_table.get_strings()
             }
 
+        # Calculate thread status percentages for display
+        total_threads = max(1, self.thread_status_counts["total"])
+        thread_stats = {
+            "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100,
+            "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100,
+            "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100,
+            "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100,
+            **self.thread_status_counts
+        }
+
+        # Calculate per-thread statistics with percentages
+        per_thread_stats_with_pct = {}
+        total_samples_denominator = max(1, self._sample_count)
+        for thread_id, stats in self.per_thread_stats.items():
+            total = max(1, stats["total"])
+            per_thread_stats_with_pct[thread_id] = {
+                "has_gil_pct": (stats["has_gil"] / total) * 100,
+                "on_cpu_pct": (stats["on_cpu"] / total) * 100,
+                "gil_requested_pct": (stats["gil_requested"] / total) * 100,
+                "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100,
+                **stats
+            }
+
         # If we only have one root child, make it the root to avoid redundant level
         if len(root_children) == 1:
             main_child = root_children[0]
@@ -185,7 +255,11 @@ class FlamegraphCollector(StackTraceCollector):
             old_name = self._string_table.get_string(main_child["name"])
             new_name = f"Program Root: {old_name}"
             main_child["name"] = self._string_table.intern(new_name)
-            main_child["stats"] = self.stats
+            main_child["stats"] = {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            }
             main_child["threads"] = sorted(list(self._all_threads))
             main_child["strings"] = self._string_table.get_strings()
             return main_child
@@ -194,7 +268,11 @@ class FlamegraphCollector(StackTraceCollector):
             "name": self._string_table.intern("Program Root"),
             "value": total_samples,
             "children": root_children,
-            "stats": self.stats,
+            "stats": {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            },
             "threads": sorted(list(self._all_threads)),
             "strings": self._string_table.get_strings()
         }
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index a592f16b367c..38665f5a591e 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -14,6 +14,15 @@ try:
         FlamegraphCollector,
     )
     from profiling.sampling.gecko_collector import GeckoCollector
+    from profiling.sampling.constants import (
+        PROFILING_MODE_WALL,
+        PROFILING_MODE_CPU,
+    )
+    from _remote_debugging import (
+        THREAD_STATUS_HAS_GIL,
+        THREAD_STATUS_ON_CPU,
+        THREAD_STATUS_GIL_REQUESTED,
+    )
 except ImportError:
     raise unittest.SkipTest(
         "Test only runs when _remote_debugging is available"
@@ -657,17 +666,6 @@ class TestSampleProfilerComponents(unittest.TestCase):
 
     def test_gecko_collector_markers(self):
         """Test Gecko profile markers for GIL and CPU state tracking."""
-        try:
-            from _remote_debugging import (
-                THREAD_STATUS_HAS_GIL,
-                THREAD_STATUS_ON_CPU,
-                THREAD_STATUS_GIL_REQUESTED,
-            )
-        except ImportError:
-            THREAD_STATUS_HAS_GIL = 1 << 0
-            THREAD_STATUS_ON_CPU = 1 << 1
-            THREAD_STATUS_GIL_REQUESTED = 1 << 3
-
         collector = GeckoCollector(1000)
 
         # Status combinations for different thread states
@@ -894,3 +892,312 @@ class TestSampleProfilerComponents(unittest.TestCase):
         self.assertEqual(func1_stats[1], 2)  # nc (non-recursive calls)
         self.assertEqual(func1_stats[2], 2.0)  # tt (total time)
         self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
+
+    def test_flamegraph_collector_stats_accumulation(self):
+        """Test that FlamegraphCollector accumulates stats across samples."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # First sample
+        stack_frames_1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_1)
+        self.assertEqual(collector.thread_status_counts["has_gil"], 1)
+        self.assertEqual(collector.thread_status_counts["on_cpu"], 1)
+        self.assertEqual(collector.thread_status_counts["total"], 2)
+
+        # Second sample
+        stack_frames_2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_GIL_REQUESTED),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_2)
+
+        # Should accumulate
+        self.assertEqual(collector.thread_status_counts["has_gil"], 2)  # 1 + 1
+        self.assertEqual(collector.thread_status_counts["on_cpu"], 2)   # 1 + 1
+        self.assertEqual(collector.thread_status_counts["gil_requested"], 1)  # 0 + 1
+        self.assertEqual(collector.thread_status_counts["total"], 5)  # 2 + 3
+
+        # Test GC sample tracking
+        stack_frames_gc = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("~", 0, "<GC>")], status=THREAD_STATUS_HAS_GIL),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_gc)
+        self.assertEqual(collector.samples_with_gc_frames, 1)
+
+        # Another sample without GC
+        collector.collect(stack_frames_1)
+        self.assertEqual(collector.samples_with_gc_frames, 1)  # Still 1
+
+        # Another GC sample
+        collector.collect(stack_frames_gc)
+        self.assertEqual(collector.samples_with_gc_frames, 2)
+
+    def test_flamegraph_collector_per_thread_stats(self):
+        """Test per-thread statistics tracking in FlamegraphCollector."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Multiple threads with different states
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                    MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_GIL_REQUESTED),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Check per-thread stats
+        self.assertIn(1, collector.per_thread_stats)
+        self.assertIn(2, collector.per_thread_stats)
+        self.assertIn(3, collector.per_thread_stats)
+
+        # Thread 1: has GIL
+        self.assertEqual(collector.per_thread_stats[1]["has_gil"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["on_cpu"], 0)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 1)
+
+        # Thread 2: on CPU
+        self.assertEqual(collector.per_thread_stats[2]["has_gil"], 0)
+        self.assertEqual(collector.per_thread_stats[2]["on_cpu"], 1)
+        self.assertEqual(collector.per_thread_stats[2]["total"], 1)
+
+        # Thread 3: waiting
+        self.assertEqual(collector.per_thread_stats[3]["gil_requested"], 1)
+        self.assertEqual(collector.per_thread_stats[3]["total"], 1)
+
+        # Test accumulation across samples
+        stack_frames_2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_2)
+
+        self.assertEqual(collector.per_thread_stats[1]["has_gil"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["on_cpu"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 2)
+
+    def test_flamegraph_collector_percentage_calculations(self):
+        """Test that percentage calculations are correct in exported data."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Create scenario: 60% GIL held, 40% not held
+        for i in range(6):
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        for i in range(4):
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_ON_CPU),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        # Export to get calculated percentages
+        data = collector._convert_to_flamegraph_format()
+        thread_stats = data["stats"]["thread_stats"]
+
+        self.assertAlmostEqual(thread_stats["has_gil_pct"], 60.0, places=1)
+        self.assertAlmostEqual(thread_stats["on_cpu_pct"], 40.0, places=1)
+        self.assertEqual(thread_stats["total"], 10)
+
+    def test_flamegraph_collector_mode_handling(self):
+        """Test that profiling mode is correctly passed through to exported data."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Collect some data
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Set stats with mode
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_CPU
+        )
+
+        data = collector._convert_to_flamegraph_format()
+        self.assertEqual(data["stats"]["mode"], PROFILING_MODE_CPU)
+
+    def test_flamegraph_collector_zero_samples_edge_case(self):
+        """Test that collector handles zero samples gracefully."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Export without collecting any samples
+        data = collector._convert_to_flamegraph_format()
+
+        # Should return a valid structure with no data
+        self.assertIn("name", data)
+        self.assertEqual(data["value"], 0)
+        self.assertIn("children", data)
+        self.assertEqual(len(data["children"]), 0)
+
+    def test_flamegraph_collector_json_structure_includes_stats(self):
+        """Test that exported JSON includes thread_stats and per_thread_stats."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Collect some data with multiple threads
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Set stats
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_WALL
+        )
+
+        # Export and verify structure
+        data = collector._convert_to_flamegraph_format()
+
+        # Check that stats object exists and contains expected fields
+        self.assertIn("stats", data)
+        stats = data["stats"]
+
+        # Verify thread_stats exists and has expected structure
+        self.assertIn("thread_stats", stats)
+        thread_stats = stats["thread_stats"]
+        self.assertIn("has_gil_pct", thread_stats)
+        self.assertIn("on_cpu_pct", thread_stats)
+        self.assertIn("gil_requested_pct", thread_stats)
+        self.assertIn("gc_pct", thread_stats)
+        self.assertIn("total", thread_stats)
+
+        # Verify per_thread_stats exists and has data for both threads
+        self.assertIn("per_thread_stats", stats)
+        per_thread_stats = stats["per_thread_stats"]
+        self.assertIn(1, per_thread_stats)
+        self.assertIn(2, per_thread_stats)
+
+        # Check per-thread structure
+        for thread_id in [1, 2]:
+            thread_data = per_thread_stats[thread_id]
+            self.assertIn("has_gil_pct", thread_data)
+            self.assertIn("on_cpu_pct", thread_data)
+            self.assertIn("gil_requested_pct", thread_data)
+            self.assertIn("gc_pct", thread_data)
+            self.assertIn("total", thread_data)
+
+    def test_flamegraph_collector_per_thread_gc_percentage(self):
+        """Test that per-thread GC percentage uses total samples as denominator."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Create 10 samples total:
+        # - Thread 1 appears in all 10 samples, has GC in 2 of them
+        # - Thread 2 appears in only 5 samples, has GC in 1 of them
+
+        # First 5 samples: both threads, thread 1 has GC in 2
+        for i in range(5):
+            has_gc = i < 2  # First 2 samples have GC for thread 1
+            frames_1 = [("~", 0, "<GC>")] if has_gc else [("a.py", 1, "func_a")]
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, frames_1, status=THREAD_STATUS_HAS_GIL),
+                        MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        # Next 5 samples: only thread 1, thread 2 appears in first of these with GC
+        for i in range(5):
+            if i == 0:
+                # Thread 2 appears in this sample with GC
+                stack_frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                            MockThreadInfo(2, [("~", 0, "<GC>")], status=THREAD_STATUS_ON_CPU),
+                        ],
+                    )
+                ]
+            else:
+                # Only thread 1
+                stack_frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                        ],
+                    )
+                ]
+            collector.collect(stack_frames)
+
+        # Set stats and export
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_WALL
+        )
+
+        data = collector._convert_to_flamegraph_format()
+        per_thread_stats = data["stats"]["per_thread_stats"]
+
+        # Thread 1: appeared in 10 samples, had GC in 2
+        # GC percentage should be 2/10 = 20% (using total samples, not thread appearances)
+        self.assertEqual(collector.per_thread_stats[1]["gc_samples"], 2)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 10)
+        self.assertAlmostEqual(per_thread_stats[1]["gc_pct"], 20.0, places=1)
+
+        # Thread 2: appeared in 6 samples, had GC in 1
+        # GC percentage should be 1/10 = 10% (using total samples, not thread appearances)
+        self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1)
+        self.assertEqual(collector.per_thread_stats[2]["total"], 6)
+        self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1)
diff --git a/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst b/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst
new file mode 100644
index 000000000000..5742beeb85c2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst
@@ -0,0 +1,5 @@
+The ``profiling.sampling`` flamegraph profiler now displays thread status
+statistics showing the percentage of time threads spend holding the GIL,
+running without the GIL, waiting for the GIL, and performing garbage
+collection. These statistics help identify GIL contention and thread behavior
+patterns. When filtering by thread, the display shows per-thread metrics.
-- 
2.47.3