]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf/x86/intel: Setup PEBS data configuration and enable legacy groups
authorDapeng Mi <dapeng1.mi@linux.intel.com>
Wed, 29 Oct 2025 10:21:35 +0000 (18:21 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 7 Nov 2025 14:08:22 +0000 (15:08 +0100)
Different with legacy PEBS, arch-PEBS provides per-counter PEBS data
configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs.

This patch obtains PEBS data configuration from event attribute and then
writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and
enable corresponding PEBS groups.

Please notice this patch only enables XMM SIMD regs sampling for
arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS
would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling
is supported.

Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-12-dapeng1.mi@linux.intel.com
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/intel_ds.h
arch/x86/include/asm/msr-index.h

index 40ccfd80d55447be735e76e92f0fecbbd00ac468..75cba28b86d51de34654aedc1712c297a3dd1a65 100644 (file)
@@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
        cpuc->fixed_ctrl_val &= ~mask;
 }
 
+static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u32 msr;
+
+       if (idx < INTEL_PMC_IDX_FIXED) {
+               msr = MSR_IA32_PMC_V6_GP0_CFG_C +
+                     x86_pmu.addr_offset(idx, false);
+       } else {
+               msr = MSR_IA32_PMC_V6_FX0_CFG_C +
+                     x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
+       }
+
+       cpuc->cfg_c_val[idx] = ext;
+       wrmsrq(msr, ext);
+}
+
+static void intel_pmu_disable_event_ext(struct perf_event *event)
+{
+       if (!x86_pmu.arch_pebs)
+               return;
+
+       /*
+        * Only clear CFG_C MSR for PEBS counter group events,
+        * it avoids the HW counter's value to be added into
+        * other PEBS records incorrectly after PEBS counter
+        * group events are disabled.
+        *
+        * For other events, it's unnecessary to clear CFG_C MSRs
+        * since CFG_C doesn't take effect if counter is in
+        * disabled state. That helps to reduce the WRMSR overhead
+        * in context switches.
+        */
+       if (!is_pebs_counter_event_group(event))
+               return;
+
+       __intel_pmu_update_event_ext(event->hw.idx, 0);
+}
+
 static void intel_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
        switch (idx) {
        case 0 ... INTEL_PMC_IDX_FIXED - 1:
                intel_clear_masks(event, idx);
+               intel_pmu_disable_event_ext(event);
                x86_pmu_disable_event(event);
                break;
        case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+               intel_pmu_disable_event_ext(event);
+               fallthrough;
        case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
                intel_pmu_disable_fixed(event);
                break;
@@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
 
 DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
 
+static void intel_pmu_enable_event_ext(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       union arch_pebs_index old, new;
+       struct arch_pebs_cap cap;
+       u64 ext = 0;
+
+       if (!x86_pmu.arch_pebs)
+               return;
+
+       cap = hybrid(cpuc->pmu, arch_pebs_cap);
+
+       if (event->attr.precise_ip) {
+               u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
+
+               ext |= ARCH_PEBS_EN;
+               if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
+                       ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
+
+               if (pebs_data_cfg && cap.caps) {
+                       if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
+                               ext |= ARCH_PEBS_AUX & cap.caps;
+
+                       if (pebs_data_cfg & PEBS_DATACFG_GP)
+                               ext |= ARCH_PEBS_GPR & cap.caps;
+
+                       if (pebs_data_cfg & PEBS_DATACFG_XMMS)
+                               ext |= ARCH_PEBS_VECR_XMM & cap.caps;
+
+                       if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+                               ext |= ARCH_PEBS_LBR & cap.caps;
+               }
+
+               if (cpuc->n_pebs == cpuc->n_large_pebs)
+                       new.thresh = ARCH_PEBS_THRESH_MULTI;
+               else
+                       new.thresh = ARCH_PEBS_THRESH_SINGLE;
+
+               rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
+               if (new.thresh != old.thresh || !old.en) {
+                       if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
+                               /*
+                                * Large PEBS was enabled.
+                                * Drain PEBS buffer before applying the single PEBS.
+                                */
+                               intel_pmu_drain_pebs_buffer();
+                       } else {
+                               new.wr = 0;
+                               new.full = 0;
+                               new.en = 1;
+                               wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
+                       }
+               }
+       }
+
+       if (cpuc->cfg_c_val[hwc->idx] != ext)
+               __intel_pmu_update_event_ext(hwc->idx, ext);
+}
+
 static void intel_pmu_enable_event(struct perf_event *event)
 {
        u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
                        enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
                intel_set_masks(event, idx);
                static_call_cond(intel_pmu_enable_acr_event)(event);
+               intel_pmu_enable_event_ext(event);
                __x86_pmu_enable_event(hwc, enable_mask);
                break;
        case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
                static_call_cond(intel_pmu_enable_acr_event)(event);
+               intel_pmu_enable_event_ext(event);
                fallthrough;
        case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
                intel_pmu_enable_fixed(event);
@@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
        return false;
 }
 
+static inline void __intel_update_pmu_caps(struct pmu *pmu)
+{
+       struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
+
+       if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
+               dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
+}
+
+static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
+{
+       u64 caps = hybrid(pmu, arch_pebs_cap).caps;
+
+       x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
+       if (caps & ARCH_PEBS_LBR)
+               x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
+
+       if (!(caps & ARCH_PEBS_AUX))
+               x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
+       if (!(caps & ARCH_PEBS_GPR)) {
+               x86_pmu.large_pebs_flags &=
+                       ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
+       }
+}
+
 #define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
 
 static void update_pmu_cap(struct pmu *pmu)
@@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
                hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
                hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
 
-               if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
+               if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
                        x86_pmu.arch_pebs = 0;
+               } else {
+                       __intel_update_pmu_caps(pmu);
+                       __intel_update_large_pebs_flags(pmu);
+               }
        } else {
                WARN_ON(x86_pmu.arch_pebs == 1);
                x86_pmu.arch_pebs = 0;
@@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
                }
        }
 
+       __intel_update_pmu_caps(cpuc->pmu);
+
        if (!cpuc->shared_regs)
                return;
 
index 1179980f795b97aedd22bb23c7e945e3563f4549..c66e9b562de39ab12681c5ea284cf50d3d0b2afc 100644 (file)
@@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
        }
 }
 
+u64 intel_get_arch_pebs_data_config(struct perf_event *event)
+{
+       u64 pebs_data_cfg = 0;
+
+       if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
+               return 0;
+
+       pebs_data_cfg |= pebs_update_adaptive_cfg(event);
+
+       return pebs_data_cfg;
+}
+
 void intel_pmu_pebs_add(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
 
        index.wr = 0;
        index.full = 0;
+       index.en = 1;
+       if (cpuc->n_pebs == cpuc->n_large_pebs)
+               index.thresh = ARCH_PEBS_THRESH_MULTI;
+       else
+               index.thresh = ARCH_PEBS_THRESH_SINGLE;
        wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
 
        mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
index 13f411bca6bc55fb44561fed7972a8b71e2b52af..3161ec0a3416d265336405b4d08a18efbbea1cab 100644 (file)
@@ -304,6 +304,8 @@ struct cpu_hw_events {
        /* Intel ACR configuration */
        u64                     acr_cfg_b[X86_PMC_IDX_MAX];
        u64                     acr_cfg_c[X86_PMC_IDX_MAX];
+       /* Cached CFG_C values */
+       u64                     cfg_c_val[X86_PMC_IDX_MAX];
 
        /*
         * Intel LBR bits
@@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
 
 void intel_pmu_pebs_data_source_lnl(void);
 
+u64 intel_get_arch_pebs_data_config(struct perf_event *event);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
index 023c2883f9f3e9ebc4b9fd9a539d95575bb4d026..695f87efbeb8921056657c8420f8f5fe47871b7b 100644 (file)
@@ -7,6 +7,13 @@
 #define PEBS_BUFFER_SHIFT      4
 #define PEBS_BUFFER_SIZE       (PAGE_SIZE << PEBS_BUFFER_SHIFT)
 
+/*
+ * The largest PEBS record could consume a page, ensure
+ * a record at least can be written after triggering PMI.
+ */
+#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
+#define ARCH_PEBS_THRESH_SINGLE        1
+
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS_FMT4   8
 #define MAX_PEBS_EVENTS                32
index fc7a4e7c718d36e7969728d66a22c76dc0423a91..f1ef9ac38bfb9515420d1a3b07fccb6c3f556833 100644 (file)
 #define ARCH_PEBS_OFFSET_MASK          0x7fffff
 #define ARCH_PEBS_INDEX_WR_SHIFT       4
 
+#define ARCH_PEBS_RELOAD               0xffffffff
+#define ARCH_PEBS_LBR_SHIFT            40
+#define ARCH_PEBS_LBR                  (0x3ull << ARCH_PEBS_LBR_SHIFT)
+#define ARCH_PEBS_VECR_XMM             BIT_ULL(49)
+#define ARCH_PEBS_GPR                  BIT_ULL(61)
+#define ARCH_PEBS_AUX                  BIT_ULL(62)
+#define ARCH_PEBS_EN                   BIT_ULL(63)
+
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define RTIT_CTL_TRACEEN               BIT(0)
 #define RTIT_CTL_CYCLEACC              BIT(1)