]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf/x86/intel: Fix segfault with PEBS-via-PT with sample_freq
authorAdrian Hunter <adrian.hunter@intel.com>
Thu, 8 May 2025 13:44:52 +0000 (16:44 +0300)
committerIngo Molnar <mingo@kernel.org>
Thu, 15 May 2025 16:15:54 +0000 (18:15 +0200)
Currently, using PEBS-via-PT with a sample frequency instead of a sample
period, causes a segfault.  For example:

    BUG: kernel NULL pointer dereference, address: 0000000000000195
    <NMI>
    ? __die_body.cold+0x19/0x27
    ? page_fault_oops+0xca/0x290
    ? exc_page_fault+0x7e/0x1b0
    ? asm_exc_page_fault+0x26/0x30
    ? intel_pmu_pebs_event_update_no_drain+0x40/0x60
    ? intel_pmu_pebs_event_update_no_drain+0x32/0x60
    intel_pmu_drain_pebs_icl+0x333/0x350
    handle_pmi_common+0x272/0x3c0
    intel_pmu_handle_irq+0x10a/0x2e0
    perf_event_nmi_handler+0x2a/0x50

That happens because intel_pmu_pebs_event_update_no_drain() assumes all the
pebs_enabled bits represent counter indexes, which is not always the case.
In this particular case, bits 60 and 61 are set for PEBS-via-PT purposes.

The behaviour of PEBS-via-PT with sample frequency is questionable because
although a PMI is generated (PEBS_PMI_AFTER_EACH_RECORD), the period is not
adjusted anyway.

Putting that aside, fix intel_pmu_pebs_event_update_no_drain() by passing
the mask of counter bits instead of 'size'.  Note, prior to the Fixes
commit, 'size' would be limited to the maximum counter index, so the issue
was not hit.

Fixes: 722e42e45c2f1 ("perf/x86: Support counter mask")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Link: https://lore.kernel.org/r/20250508134452.73960-1-adrian.hunter@intel.com
arch/x86/events/intel/ds.c

index 9b20acc0e932e20770e5925c7314f1ef88bc28b8..8d86e91bd5e5c3d9e75bb63e487c7edf56116e19 100644 (file)
@@ -2465,8 +2465,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
                                setup_pebs_fixed_sample_data);
 }
 
-static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
 {
+       u64 pebs_enabled = cpuc->pebs_enabled & mask;
        struct perf_event *event;
        int bit;
 
@@ -2477,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int
         * It needs to call intel_pmu_save_and_restart_reload() to
         * update the event->count for this case.
         */
-       for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+       for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
                event = cpuc->events[bit];
                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
                        intel_pmu_save_and_restart_reload(event, 0);
@@ -2512,7 +2513,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
        }
 
        if (unlikely(base >= top)) {
-               intel_pmu_pebs_event_update_no_drain(cpuc, size);
+               intel_pmu_pebs_event_update_no_drain(cpuc, mask);
                return;
        }
 
@@ -2626,7 +2627,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
               (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
 
        if (unlikely(base >= top)) {
-               intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+               intel_pmu_pebs_event_update_no_drain(cpuc, mask);
                return;
        }