]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
perf/amd/ibs: Avoid race between event add and NMI
authorRavi Bangoria <ravi.bangoria@amd.com>
Mon, 16 Feb 2026 04:22:16 +0000 (04:22 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 27 Feb 2026 15:40:23 +0000 (16:40 +0100)
Consider the following race:

  --------
  o OP_CTL contains stale value: OP_CTL[Val]=1, OP_CTL[En]=0
  o A new IBS OP event is being added
  o [P]: Process context, [N]: NMI context

  [P] perf_ibs_add(event) {
  [P]     if (test_and_set_bit(IBS_ENABLED, pcpu->state))
  [P]         return;
  [P]     /* pcpu->state = IBS_ENABLED */
  [P]
  [P]     pcpu->event = event;
  [P]
  [P]     perf_ibs_start(event) {
  [P]         set_bit(IBS_STARTED, pcpu->state);
  [P]         /* pcpu->state = IBS_ENABLED | IBS_STARTED */
  [P]         clear_bit(IBS_STOPPING, pcpu->state);
  [P]         /* pcpu->state = IBS_ENABLED | IBS_STARTED */

  [N] --> NMI due to genuine FETCH event. perf_ibs_handle_irq()
  [N]     called for OP PMU as well.
  [N]
  [N] perf_ibs_handle_irq(perf_ibs) {
  [N]     event = pcpu->event; /* See line 6 */
  [N]
  [N]     if (!test_bit(IBS_STARTED, pcpu->state)) /* false */
  [N]         return 0;
  [N]
  [N]     if (WARN_ON_ONCE(!event)) /* false */
  [N]         goto fail;
  [N]
  [N]     if (!(*buf++ & perf_ibs->valid_mask)) /* false due to stale
  [N]                                            * IBS_OP_CTL value */
  [N]         goto fail;
  [N]
  [N]         ...
  [N]
  [N]     perf_ibs_enable_event() // *Accidentally* enable the event.
  [N] }
  [N]
  [N] /*
  [N]  * Repeated NMIs may follow due to accidentally enabled IBS OP
  [N]  * event if the sample period is very low. It could also lead
  [N]  * to pcpu->state corruption if the event gets throttled due
  [N]  * to too frequent NMIs.
  [N]  */

  [P]         perf_ibs_enable_event();
  [P]     }
  [P] }
  --------

We cannot safely clear IBS_{FETCH|OP}_CTL while disabling the event,
because the register might be read again later. So, clear the register
in the enable path - before we update pcpu->state and enable the event.
This guarantees that any NMI that lands in the gap finds Val=0 and
bails out cleanly.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://patch.msgid.link/20260216042216.1440-6-ravi.bangoria@amd.com
arch/x86/events/amd/ibs.c

index 004226b52ac7986e8fade13bdc4f020305b144ac..32e6456cb5e581b77f47920297b05934ecb31266 100644 (file)
@@ -493,6 +493,14 @@ static void perf_ibs_start(struct perf_event *event, int flags)
        }
        config |= period >> 4;
 
+       /*
+        * Reset the IBS_{FETCH|OP}_CTL MSR before updating pcpu->state.
+        * Doing so prevents a race condition in which an NMI due to other
+        * source might accidentally activate the event before we enable
+        * it ourselves.
+        */
+       perf_ibs_disable_event(perf_ibs, hwc, 0);
+
        /*
         * Set STARTED before enabling the hardware, such that a subsequent NMI
         * must observe it.