perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR

author Dapeng Mi <dapeng1.mi@linux.intel.com>

Wed, 29 Oct 2025 10:21:33 +0000 (18:21 +0800)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 7 Nov 2025 14:08:22 +0000 (15:08 +0100)
author Dapeng Mi <dapeng1.mi@linux.intel.com>
Wed, 29 Oct 2025 10:21:33 +0000 (18:21 +0800)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 7 Nov 2025 14:08:22 +0000 (15:08 +0100)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c

index de4dbde28adcd01f1ab4976d95bb5ef0a0557a49..6e04d73dfae50565f25c3c5c1f152d112dff2675 100644 (file)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5227,7 +5227,13 @@ err:
  
  static int intel_pmu_cpu_prepare(int cpu)
  {
-       return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+       int ret;
+
+       ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+       if (ret)
+               return ret;
+
+       return alloc_arch_pebs_buf_on_cpu(cpu);
  }
  
  static void flip_smm_bit(void *data)
@@ -5458,6 +5464,7 @@ static void intel_pmu_cpu_starting(int cpu)
                 return;
  
         init_debug_store_on_cpu(cpu);
+       init_arch_pebs_on_cpu(cpu);
         /*
          * Deal with CPUs that don't clear their LBRs on power-up, and that may
          * even boot with LBRs enabled.
@@ -5555,6 +5562,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
  static void intel_pmu_cpu_dying(int cpu)
  {
         fini_debug_store_on_cpu(cpu);
+       fini_arch_pebs_on_cpu(cpu);
  }
  
  void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@@ -5575,6 +5583,7 @@ static void intel_pmu_cpu_dead(int cpu)
  {
         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  
+       release_arch_pebs_buf_on_cpu(cpu);
         intel_cpuc_finish(cpuc);
  
         if (is_hybrid() && cpuc->pmu)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c

index fe1bf373409e2876d18127c584d6ceda30f82dcb..5c26a5235f94c1ed44eee16a355c3d173f6dc078 100644 (file)
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -625,13 +625,18 @@ static int alloc_pebs_buffer(int cpu)
         int max, node = cpu_to_node(cpu);
         void *buffer, *insn_buff, *cea;
  
-       if (!x86_pmu.ds_pebs)
+       if (!intel_pmu_has_pebs())
                 return 0;
  
         buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
         if (unlikely(!buffer))
                 return -ENOMEM;
  
+       if (x86_pmu.arch_pebs) {
+               hwev->pebs_vaddr = buffer;
+               return 0;
+       }
+
         /*
          * HSW+ already provides us the eventing ip; no need to allocate this
          * buffer then.
@@ -644,7 +649,7 @@ static int alloc_pebs_buffer(int cpu)
                 }
                 per_cpu(insn_buffer, cpu) = insn_buff;
         }
-       hwev->ds_pebs_vaddr = buffer;
+       hwev->pebs_vaddr = buffer;
         /* Update the cpu entry area mapping */
         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
         ds->pebs_buffer_base = (unsigned long) cea;
@@ -660,17 +665,20 @@ static void release_pebs_buffer(int cpu)
         struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
         void *cea;
  
-       if (!x86_pmu.ds_pebs)
+       if (!intel_pmu_has_pebs())
                 return;
  
-       kfree(per_cpu(insn_buffer, cpu));
-       per_cpu(insn_buffer, cpu) = NULL;
+       if (x86_pmu.ds_pebs) {
+               kfree(per_cpu(insn_buffer, cpu));
+               per_cpu(insn_buffer, cpu) = NULL;
  
-       /* Clear the fixmap */
-       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
-       ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
-       dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
-       hwev->ds_pebs_vaddr = NULL;
+               /* Clear the fixmap */
+               cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+               ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
+       }
+
+       dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
+       hwev->pebs_vaddr = NULL;
  }
  
  static int alloc_bts_buffer(int cpu)
@@ -823,6 +831,56 @@ void reserve_ds_buffers(void)
         }
  }
  
+inline int alloc_arch_pebs_buf_on_cpu(int cpu)
+{
+       if (!x86_pmu.arch_pebs)
+               return 0;
+
+       return alloc_pebs_buffer(cpu);
+}
+
+inline void release_arch_pebs_buf_on_cpu(int cpu)
+{
+       if (!x86_pmu.arch_pebs)
+               return;
+
+       release_pebs_buffer(cpu);
+}
+
+void init_arch_pebs_on_cpu(int cpu)
+{
+       struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+       u64 arch_pebs_base;
+
+       if (!x86_pmu.arch_pebs)
+               return;
+
+       if (!cpuc->pebs_vaddr) {
+               WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
+               x86_pmu.pebs_active = 0;
+               return;
+       }
+
+       /*
+        * 4KB-aligned pointer of the output buffer
+        * (__alloc_pages_node() return page aligned address)
+        * Buffer Size = 4KB * 2^SIZE
+        * contiguous physical buffer (__alloc_pages_node() with order)
+        */
+       arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
+       wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
+                    (u32)(arch_pebs_base >> 32));
+       x86_pmu.pebs_active = 1;
+}
+
+inline void fini_arch_pebs_on_cpu(int cpu)
+{
+       if (!x86_pmu.arch_pebs)
+               return;
+
+       wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
+}
+
  /*
   * BTS
   */
@@ -2883,8 +2941,8 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
                 return;
         }
  
-       base = cpuc->ds_pebs_vaddr;
-       top = (void *)((u64)cpuc->ds_pebs_vaddr +
+       base = cpuc->pebs_vaddr;
+       top = (void *)((u64)cpuc->pebs_vaddr +
                        (index.wr << ARCH_PEBS_INDEX_WR_SHIFT));
  
         index.wr = 0;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h

index ca5289980b5274a67e3f469233013919fd303cfc..13f411bca6bc55fb44561fed7972a8b71e2b52af 100644 (file)
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -283,8 +283,9 @@ struct cpu_hw_events {
          * Intel DebugStore bits
          */
         struct debug_store      *ds;
-       void                    *ds_pebs_vaddr;
         void                    *ds_bts_vaddr;
+       /* DS based PEBS or arch-PEBS buffer address */
+       void                    *pebs_vaddr;
         u64                     pebs_enabled;
         int                     n_pebs;
         int                     n_large_pebs;
@@ -1617,6 +1618,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
  
  int intel_pmu_init(void);
  
+int alloc_arch_pebs_buf_on_cpu(int cpu);
+
+void release_arch_pebs_buf_on_cpu(int cpu);
+
+void init_arch_pebs_on_cpu(int cpu);
+
+void fini_arch_pebs_on_cpu(int cpu);
+
  void init_debug_store_on_cpu(int cpu);
  
  void fini_debug_store_on_cpu(int cpu);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h

index 5dbeac48a5b932e4471d23b3642ef58317e44b2c..023c2883f9f3e9ebc4b9fd9a539d95575bb4d026 100644 (file)
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -4,7 +4,8 @@
  #include <linux/percpu-defs.h>
  
  #define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SHIFT      4
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << PEBS_BUFFER_SHIFT)
  
  /* The maximal number of PEBS events: */
  #define MAX_PEBS_EVENTS_FMT4   8
author	Dapeng Mi <dapeng1.mi@linux.intel.com>
	Wed, 29 Oct 2025 10:21:33 +0000 (18:21 +0800)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 7 Nov 2025 14:08:22 +0000 (15:08 +0100)
arch/x86/events/intel/core.c		patch \| blob \| blame \| history
arch/x86/events/intel/ds.c		patch \| blob \| blame \| history
arch/x86/events/perf_event.h		patch \| blob \| blame \| history
arch/x86/include/asm/intel_ds.h		patch \| blob \| blame \| history