]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
perf/x86/intel: Add counter group support for arch-PEBS
authorDapeng Mi <dapeng1.mi@linux.intel.com>
Wed, 29 Oct 2025 10:21:36 +0000 (18:21 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 7 Nov 2025 14:08:22 +0000 (15:08 +0100)
Base on previous adaptive PEBS counter snapshot support, add counter
group support for architectural PEBS. Since arch-PEBS shares same
counter group layout with adaptive PEBS, directly reuse
__setup_pebs_counter_group() helper to process arch-PEBS counter group.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-13-dapeng1.mi@linux.intel.com
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/perf_event.h

index 75cba28b86d51de34654aedc1712c297a3dd1a65..cb64018321ddcd16863c6dd8b13d7cc0c2a4ebf0 100644 (file)
@@ -3014,6 +3014,17 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
 
                        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
                                ext |= ARCH_PEBS_LBR & cap.caps;
+
+                       if (pebs_data_cfg &
+                           (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
+                               ext |= ARCH_PEBS_CNTR_GP & cap.caps;
+
+                       if (pebs_data_cfg &
+                           (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
+                               ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
+
+                       if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+                               ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
                }
 
                if (cpuc->n_pebs == cpuc->n_large_pebs)
@@ -3038,6 +3049,9 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
                }
        }
 
+       if (is_pebs_counter_event_group(event))
+               ext |= ARCH_PEBS_CNTR_ALLOW;
+
        if (cpuc->cfg_c_val[hwc->idx] != ext)
                __intel_pmu_update_event_ext(hwc->idx, ext);
 }
@@ -4323,6 +4337,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
        return false;
 }
 
+static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
+{
+       u64 caps;
+
+       if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
+               return true;
+
+       caps = hybrid(pmu, arch_pebs_cap).caps;
+       if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
+               return true;
+
+       return false;
+}
+
 static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
                                                 u64 *cause_mask, int *num)
 {
@@ -4471,8 +4499,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
        }
 
        if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
-           (x86_pmu.intel_cap.pebs_format >= 6) &&
-           x86_pmu.intel_cap.pebs_baseline &&
+           intel_pmu_has_pebs_counter_group(event->pmu) &&
            is_sampling_event(event) &&
            event->attr.precise_ip)
                event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
@@ -5420,6 +5447,8 @@ static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
        if (caps & ARCH_PEBS_LBR)
                x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
+       if (caps & ARCH_PEBS_CNTR_MASK)
+               x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
 
        if (!(caps & ARCH_PEBS_AUX))
                x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
@@ -7134,8 +7163,11 @@ __init int intel_pmu_init(void)
         * Many features on and after V6 require dynamic constraint,
         * e.g., Arch PEBS, ACR.
         */
-       if (version >= 6)
+       if (version >= 6) {
                x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+               x86_pmu.late_setup = intel_pmu_late_setup;
+       }
+
        /*
         * Install the hw-cache-events table:
         */
index c66e9b562de39ab12681c5ea284cf50d3d0b2afc..c93bf971d97b9e938ac6bf3223563fc5361d48d3 100644 (file)
@@ -1530,13 +1530,20 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
 
 u64 intel_get_arch_pebs_data_config(struct perf_event *event)
 {
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 pebs_data_cfg = 0;
+       u64 cntr_mask;
 
        if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
                return 0;
 
        pebs_data_cfg |= pebs_update_adaptive_cfg(event);
 
+       cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
+                   (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
+                   PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
+       pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
+
        return pebs_data_cfg;
 }
 
@@ -2444,6 +2451,24 @@ again:
                }
        }
 
+       if (header->cntr) {
+               struct arch_pebs_cntr_header *cntr = next_record;
+               unsigned int nr;
+
+               next_record += sizeof(struct arch_pebs_cntr_header);
+
+               if (is_pebs_counter_event_group(event)) {
+                       __setup_pebs_counter_group(cpuc, event,
+                               (struct pebs_cntr_header *)cntr, next_record);
+                       data->sample_flags |= PERF_SAMPLE_READ;
+               }
+
+               nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
+               if (cntr->metrics == INTEL_CNTR_METRICS)
+                       nr += 2;
+               next_record += nr * sizeof(u64);
+       }
+
        /* Parse followed fragments if there are. */
        if (arch_pebs_record_continued(header)) {
                at = at + header->size;
@@ -3094,10 +3119,8 @@ static void __init intel_ds_pebs_init(void)
                        break;
 
                case 6:
-                       if (x86_pmu.intel_cap.pebs_baseline) {
+                       if (x86_pmu.intel_cap.pebs_baseline)
                                x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
-                               x86_pmu.late_setup = intel_pmu_late_setup;
-                       }
                        fallthrough;
                case 5:
                        x86_pmu.pebs_ept = 1;
index f1ef9ac38bfb9515420d1a3b07fccb6c3f556833..65cc528fbad852f6332eb48908d96e23db07f280 100644 (file)
 #define ARCH_PEBS_INDEX_WR_SHIFT       4
 
 #define ARCH_PEBS_RELOAD               0xffffffff
+#define ARCH_PEBS_CNTR_ALLOW           BIT_ULL(35)
+#define ARCH_PEBS_CNTR_GP              BIT_ULL(36)
+#define ARCH_PEBS_CNTR_FIXED           BIT_ULL(37)
+#define ARCH_PEBS_CNTR_METRICS         BIT_ULL(38)
 #define ARCH_PEBS_LBR_SHIFT            40
 #define ARCH_PEBS_LBR                  (0x3ull << ARCH_PEBS_LBR_SHIFT)
 #define ARCH_PEBS_VECR_XMM             BIT_ULL(49)
 #define ARCH_PEBS_GPR                  BIT_ULL(61)
 #define ARCH_PEBS_AUX                  BIT_ULL(62)
 #define ARCH_PEBS_EN                   BIT_ULL(63)
+#define ARCH_PEBS_CNTR_MASK            (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
+                                        ARCH_PEBS_CNTR_METRICS)
 
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define RTIT_CTL_TRACEEN               BIT(0)
index 3b3848f0d3393e59981dfd492b5f452fc7b18e68..7276ba70c88a9329330eaa71eb47c60678c4a76b 100644 (file)
 #define ARCH_PERFMON_EVENTS_COUNT                      7
 
 #define PEBS_DATACFG_MEMINFO   BIT_ULL(0)
-#define PEBS_DATACFG_GP        BIT_ULL(1)
+#define PEBS_DATACFG_GP                BIT_ULL(1)
 #define PEBS_DATACFG_XMMS      BIT_ULL(2)
 #define PEBS_DATACFG_LBRS      BIT_ULL(3)
-#define PEBS_DATACFG_LBR_SHIFT 24
 #define PEBS_DATACFG_CNTR      BIT_ULL(4)
+#define PEBS_DATACFG_METRICS   BIT_ULL(5)
+#define PEBS_DATACFG_LBR_SHIFT 24
 #define PEBS_DATACFG_CNTR_SHIFT        32
 #define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
 #define PEBS_DATACFG_FIX_SHIFT 48
 #define PEBS_DATACFG_FIX_MASK  GENMASK_ULL(7, 0)
-#define PEBS_DATACFG_METRICS   BIT_ULL(5)
 
 /* Steal the highest bit of pebs_data_cfg for SW usage */
 #define PEBS_UPDATE_DS_SW      BIT_ULL(63)
@@ -603,6 +603,13 @@ struct arch_pebs_lbr_header {
        u64 ler_info;
 };
 
+struct arch_pebs_cntr_header {
+       u32 cntr;
+       u32 fixed;
+       u32 metrics;
+       u32 reserved;
+};
+
 /*
  * AMD Extended Performance Monitoring and Debug cpuid feature detection
  */