releases/4.19.35/x86-perf-amd-remove-need-to-check-running-bit-in-nmi-handler.patch

   1 From 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 Mon Sep 17 00:00:00 2001
   2 From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
   3 Date: Tue, 2 Apr 2019 15:21:18 +0000
   4 Subject: x86/perf/amd: Remove need to check "running" bit in NMI handler
   5
   6 From: Lendacky, Thomas <Thomas.Lendacky@amd.com>
   7
   8 commit 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 upstream.
   9
  10 Spurious interrupt support was added to perf in the following commit, almost
  11 a decade ago:
  12
  13   63e6be6d98e1 ("perf, x86: Catch spurious interrupts after disabling counters")
  14
  15 The two previous patches (resolving the race condition when disabling a
  16 PMC and NMI latency mitigation) allow for the removal of this older
  17 spurious interrupt support.
  18
  19 Currently in x86_pmu_stop(), the bit for the PMC in the active_mask bitmap
  20 is cleared before disabling the PMC, which sets up a race condition. This
  21 race condition was mitigated by introducing the running bitmap. That race
  22 condition can be eliminated by first disabling the PMC, waiting for PMC
  23 reset on overflow and then clearing the bit for the PMC in the active_mask
  24 bitmap. The NMI handler will not re-enable a disabled counter.
  25
  26 If x86_pmu_stop() is called from the perf NMI handler, the NMI latency
  27 mitigation support will guard against any unhandled NMI messages.
  28
  29 Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
  30 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
  31 Cc: <stable@vger.kernel.org> # 4.14.x-
  32 Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
  33 Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
  34 Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
  35 Cc: Borislav Petkov <bp@alien8.de>
  36 Cc: Jiri Olsa <jolsa@redhat.com>
  37 Cc: Linus Torvalds <torvalds@linux-foundation.org>
  38 Cc: Namhyung Kim <namhyung@kernel.org>
  39 Cc: Peter Zijlstra <peterz@infradead.org>
  40 Cc: Stephane Eranian <eranian@google.com>
  41 Cc: Thomas Gleixner <tglx@linutronix.de>
  42 Cc: Vince Weaver <vincent.weaver@maine.edu>
  43 Link: https://lkml.kernel.org/r/Message-ID:
  44 Signed-off-by: Ingo Molnar <mingo@kernel.org>
  45 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  46
  47 ---
  48  arch/x86/events/amd/core.c |   21 +++++++++++++++++++--
  49  arch/x86/events/core.c     |   13 +++----------
  50  2 files changed, 22 insertions(+), 12 deletions(-)
  51
  52 --- a/arch/x86/events/amd/core.c
  53 +++ b/arch/x86/events/amd/core.c
  54 @@ -4,8 +4,8 @@
  55  #include <linux/init.h>
  56  #include <linux/slab.h>
  57  #include <linux/delay.h>
  58 -#include <linux/nmi.h>
  59  #include <asm/apicdef.h>
  60 +#include <asm/nmi.h>
  61
  62  #include "../perf_event.h"
  63
  64 @@ -491,6 +491,23 @@ static void amd_pmu_disable_all(void)
  65         }
  66  }
  67
  68 +static void amd_pmu_disable_event(struct perf_event *event)
  69 +{
  70 +       x86_pmu_disable_event(event);
  71 +
  72 +       /*
  73 +        * This can be called from NMI context (via x86_pmu_stop). The counter
  74 +        * may have overflowed, but either way, we'll never see it get reset
  75 +        * by the NMI if we're already in the NMI. And the NMI latency support
  76 +        * below will take care of any pending NMI that might have been
  77 +        * generated by the overflow.
  78 +        */
  79 +       if (in_nmi())
  80 +               return;
  81 +
  82 +       amd_pmu_wait_on_overflow(event->hw.idx);
  83 +}
  84 +
  85  /*
  86   * Because of NMI latency, if multiple PMC counters are active or other sources
  87   * of NMIs are received, the perf NMI handler can handle one or more overflowed
  88 @@ -738,7 +755,7 @@ static __initconst const struct x86_pmu
  89         .disable_all            = amd_pmu_disable_all,
  90         .enable_all             = x86_pmu_enable_all,
  91         .enable                 = x86_pmu_enable_event,
  92 -       .disable                = x86_pmu_disable_event,
  93 +       .disable                = amd_pmu_disable_event,
  94         .hw_config              = amd_pmu_hw_config,
  95         .schedule_events        = x86_schedule_events,
  96         .eventsel               = MSR_K7_EVNTSEL0,
  97 --- a/arch/x86/events/core.c
  98 +++ b/arch/x86/events/core.c
  99 @@ -1328,8 +1328,9 @@ void x86_pmu_stop(struct perf_event *eve
 100         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 101         struct hw_perf_event *hwc = &event->hw;
 102
 103 -       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
 104 +       if (test_bit(hwc->idx, cpuc->active_mask)) {
 105                 x86_pmu.disable(event);
 106 +               __clear_bit(hwc->idx, cpuc->active_mask);
 107                 cpuc->events[hwc->idx] = NULL;
 108                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 109                 hwc->state |= PERF_HES_STOPPED;
 110 @@ -1426,16 +1427,8 @@ int x86_pmu_handle_irq(struct pt_regs *r
 111         apic_write(APIC_LVTPC, APIC_DM_NMI);
 112
 113         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 114 -               if (!test_bit(idx, cpuc->active_mask)) {
 115 -                       /*
 116 -                        * Though we deactivated the counter some cpus
 117 -                        * might still deliver spurious interrupts still
 118 -                        * in flight. Catch them:
 119 -                        */
 120 -                       if (__test_and_clear_bit(idx, cpuc->running))
 121 -                               handled++;
 122 +               if (!test_bit(idx, cpuc->active_mask))
 123                         continue;
 124 -               }
 125
 126                 event = cpuc->events[idx];
 127