]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf/x86/intel: Process arch-PEBS records or record fragments
authorDapeng Mi <dapeng1.mi@linux.intel.com>
Wed, 29 Oct 2025 10:21:32 +0000 (18:21 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 7 Nov 2025 14:08:21 +0000 (15:08 +0100)
A significant difference with adaptive PEBS is that arch-PEBS record
supports fragments which means an arch-PEBS record could be split into
several independent fragments which have its own arch-PEBS header in
each fragment.

This patch defines architectural PEBS record layout structures and add
helpers to process arch-PEBS records or fragments. Only legacy PEBS
groups like basic, GPR, XMM and LBR groups are supported in this patch,
the new added YMM/ZMM/OPMASK vector registers capturing would be
supported in the future.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-9-dapeng1.mi@linux.intel.com
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/perf_event.h

index 9ce27b3269239802d7bbfdfec381acfa817d62c9..de4dbde28adcd01f1ab4976d95bb5ef0a0557a49 100644 (file)
@@ -3215,6 +3215,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
                        status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
        }
 
+       /*
+        * Arch PEBS sets bit 54 in the global status register
+        */
+       if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
+                                (unsigned long *)&status)) {
+               handled++;
+               static_call(x86_pmu_drain_pebs)(regs, &data);
+
+               if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
+                   is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
+                       status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
+       }
+
        /*
         * Intel PT
         */
index 68664526443fd0e6b967ef04bb8c1bed2bdce32d..fe1bf373409e2876d18127c584d6ceda30f82dcb 100644 (file)
@@ -2270,6 +2270,117 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
                        format_group);
 }
 
+static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
+{
+       /* Continue bit or null PEBS record indicates fragment follows. */
+       return header->cont || !(header->format & GENMASK_ULL(63, 16));
+}
+
+static void setup_arch_pebs_sample_data(struct perf_event *event,
+                                       struct pt_regs *iregs,
+                                       void *__pebs,
+                                       struct perf_sample_data *data,
+                                       struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u64 sample_type = event->attr.sample_type;
+       struct arch_pebs_header *header = NULL;
+       struct arch_pebs_aux *meminfo = NULL;
+       struct arch_pebs_gprs *gprs = NULL;
+       struct x86_perf_regs *perf_regs;
+       void *next_record;
+       void *at = __pebs;
+
+       if (at == NULL)
+               return;
+
+       perf_regs = container_of(regs, struct x86_perf_regs, regs);
+       perf_regs->xmm_regs = NULL;
+
+       __setup_perf_sample_data(event, iregs, data);
+
+       *regs = *iregs;
+
+again:
+       header = at;
+       next_record = at + sizeof(struct arch_pebs_header);
+       if (header->basic) {
+               struct arch_pebs_basic *basic = next_record;
+               u16 retire = 0;
+
+               next_record = basic + 1;
+
+               if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+                       retire = basic->valid ? basic->retire : 0;
+               __setup_pebs_basic_group(event, regs, data, sample_type,
+                                basic->ip, basic->tsc, retire);
+       }
+
+       /*
+        * The record for MEMINFO is in front of GP
+        * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
+        * Save the pointer here but process later.
+        */
+       if (header->aux) {
+               meminfo = next_record;
+               next_record = meminfo + 1;
+       }
+
+       if (header->gpr) {
+               gprs = next_record;
+               next_record = gprs + 1;
+
+               __setup_pebs_gpr_group(event, regs,
+                                      (struct pebs_gprs *)gprs,
+                                      sample_type);
+       }
+
+       if (header->aux) {
+               u64 ax = gprs ? gprs->ax : 0;
+
+               __setup_pebs_meminfo_group(event, data, sample_type,
+                                          meminfo->cache_latency,
+                                          meminfo->instr_latency,
+                                          meminfo->address, meminfo->aux,
+                                          meminfo->tsx_tuning, ax);
+       }
+
+       if (header->xmm) {
+               struct pebs_xmm *xmm;
+
+               next_record += sizeof(struct arch_pebs_xer_header);
+
+               xmm = next_record;
+               perf_regs->xmm_regs = xmm->xmm;
+               next_record = xmm + 1;
+       }
+
+       if (header->lbr) {
+               struct arch_pebs_lbr_header *lbr_header = next_record;
+               struct lbr_entry *lbr;
+               int num_lbr;
+
+               next_record = lbr_header + 1;
+               lbr = next_record;
+
+               num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
+                               lbr_header->depth :
+                               header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
+               next_record += num_lbr * sizeof(struct lbr_entry);
+
+               if (has_branch_stack(event)) {
+                       intel_pmu_store_pebs_lbrs(lbr);
+                       intel_pmu_lbr_save_brstack(data, cpuc, event);
+               }
+       }
+
+       /* Parse followed fragments if there are. */
+       if (arch_pebs_record_continued(header)) {
+               at = at + header->size;
+               goto again;
+       }
+}
+
 static inline void *
 get_next_pebs_record_by_bit(void *base, void *top, int bit)
 {
@@ -2753,6 +2864,78 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
                                            setup_pebs_adaptive_sample_data);
 }
 
+static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
+                                     struct perf_sample_data *data)
+{
+       short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+       void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       union arch_pebs_index index;
+       struct x86_perf_regs perf_regs;
+       struct pt_regs *regs = &perf_regs.regs;
+       void *base, *at, *top;
+       u64 mask;
+
+       rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
+
+       if (unlikely(!index.wr)) {
+               intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+               return;
+       }
+
+       base = cpuc->ds_pebs_vaddr;
+       top = (void *)((u64)cpuc->ds_pebs_vaddr +
+                      (index.wr << ARCH_PEBS_INDEX_WR_SHIFT));
+
+       index.wr = 0;
+       index.full = 0;
+       wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
+
+       mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
+
+       if (!iregs)
+               iregs = &dummy_iregs;
+
+       /* Process all but the last event for each counter. */
+       for (at = base; at < top;) {
+               struct arch_pebs_header *header;
+               struct arch_pebs_basic *basic;
+               u64 pebs_status;
+
+               header = at;
+
+               if (WARN_ON_ONCE(!header->size))
+                       break;
+
+               /* 1st fragment or single record must have basic group */
+               if (!header->basic) {
+                       at += header->size;
+                       continue;
+               }
+
+               basic = at + sizeof(struct arch_pebs_header);
+               pebs_status = mask & basic->applicable_counters;
+               __intel_pmu_handle_pebs_record(iregs, regs, data, at,
+                                              pebs_status, counts, last,
+                                              setup_arch_pebs_sample_data);
+
+               /* Skip non-last fragments */
+               while (arch_pebs_record_continued(header)) {
+                       if (!header->size)
+                               break;
+                       at += header->size;
+                       header = at;
+               }
+
+               /* Skip last fragment or the single record */
+               at += header->size;
+       }
+
+       __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
+                                           counts, last,
+                                           setup_arch_pebs_sample_data);
+}
+
 static void __init intel_arch_pebs_init(void)
 {
        /*
@@ -2762,6 +2945,7 @@ static void __init intel_arch_pebs_init(void)
         */
        x86_pmu.arch_pebs = 1;
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+       x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
        x86_pmu.pebs_capable = ~0ULL;
 
        x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
index 9e1720d73244f6860249509ae6040841645b1376..fc7a4e7c718d36e7969728d66a22c76dc0423a91 100644 (file)
                                         PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
                                         PERF_CAP_PEBS_TIMING_INFO)
 
+/* Arch PEBS */
+#define MSR_IA32_PEBS_BASE             0x000003f4
+#define MSR_IA32_PEBS_INDEX            0x000003f5
+#define ARCH_PEBS_OFFSET_MASK          0x7fffff
+#define ARCH_PEBS_INDEX_WR_SHIFT       4
+
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define RTIT_CTL_TRACEEN               BIT(0)
 #define RTIT_CTL_CYCLEACC              BIT(1)
index 0dfa06722bab77e9e47ad961a59442d5205fb532..3b3848f0d3393e59981dfd492b5f452fc7b18e68 100644 (file)
@@ -437,6 +437,8 @@ static inline bool is_topdown_idx(int idx)
 #define GLOBAL_STATUS_LBRS_FROZEN              BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
 #define GLOBAL_STATUS_TRACE_TOPAPMI_BIT                55
 #define GLOBAL_STATUS_TRACE_TOPAPMI            BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT  54
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD      BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
 #define GLOBAL_STATUS_PERF_METRICS_OVF_BIT     48
 
 #define GLOBAL_CTRL_EN_PERF_METRICS            BIT_ULL(48)
@@ -507,6 +509,100 @@ struct pebs_cntr_header {
 
 #define INTEL_CNTR_METRICS             0x3
 
+/*
+ * Arch PEBS
+ */
+union arch_pebs_index {
+       struct {
+               u64 rsvd:4,
+                   wr:23,
+                   rsvd2:4,
+                   full:1,
+                   en:1,
+                   rsvd3:3,
+                   thresh:23,
+                   rsvd4:5;
+       };
+       u64 whole;
+};
+
+struct arch_pebs_header {
+       union {
+               u64 format;
+               struct {
+                       u64 size:16,    /* Record size */
+                           rsvd:14,
+                           mode:1,     /* 64BIT_MODE */
+                           cont:1,
+                           rsvd2:3,
+                           cntr:5,
+                           lbr:2,
+                           rsvd3:7,
+                           xmm:1,
+                           ymmh:1,
+                           rsvd4:2,
+                           opmask:1,
+                           zmmh:1,
+                           h16zmm:1,
+                           rsvd5:5,
+                           gpr:1,
+                           aux:1,
+                           basic:1;
+               };
+       };
+       u64 rsvd6;
+};
+
+struct arch_pebs_basic {
+       u64 ip;
+       u64 applicable_counters;
+       u64 tsc;
+       u64 retire      :16,    /* Retire Latency */
+           valid       :1,
+           rsvd        :47;
+       u64 rsvd2;
+       u64 rsvd3;
+};
+
+struct arch_pebs_aux {
+       u64 address;
+       u64 rsvd;
+       u64 rsvd2;
+       u64 rsvd3;
+       u64 rsvd4;
+       u64 aux;
+       u64 instr_latency       :16,
+           pad2                :16,
+           cache_latency       :16,
+           pad3                :16;
+       u64 tsx_tuning;
+};
+
+struct arch_pebs_gprs {
+       u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+       u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
+       u64 rsvd;
+};
+
+struct arch_pebs_xer_header {
+       u64 xstate;
+       u64 rsvd;
+};
+
+#define ARCH_PEBS_LBR_NAN              0x0
+#define ARCH_PEBS_LBR_NUM_8            0x1
+#define ARCH_PEBS_LBR_NUM_16           0x2
+#define ARCH_PEBS_LBR_NUM_VAR          0x3
+#define ARCH_PEBS_BASE_LBR_ENTRIES     8
+struct arch_pebs_lbr_header {
+       u64 rsvd;
+       u64 ctl;
+       u64 depth;
+       u64 ler_from;
+       u64 ler_to;
+       u64 ler_info;
+};
+
 /*
  * AMD Extended Performance Monitoring and Debug cpuid feature detection
  */