]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.35/x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch
Linux 4.19.35
[thirdparty/kernel/stable-queue.git] / releases / 4.19.35 / x86-perf-amd-resolve-nmi-latency-issues-for-active-pmcs.patch
1 From 6d3edaae16c6c7d238360f2841212c2b26774d5e Mon Sep 17 00:00:00 2001
2 From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
3 Date: Tue, 2 Apr 2019 15:21:16 +0000
4 Subject: x86/perf/amd: Resolve NMI latency issues for active PMCs
5
6 From: Lendacky, Thomas <Thomas.Lendacky@amd.com>
7
8 commit 6d3edaae16c6c7d238360f2841212c2b26774d5e upstream.
9
10 On AMD processors, the detection of an overflowed PMC counter in the NMI
11 handler relies on the current value of the PMC. So, for example, to check
12 for overflow on a 48-bit counter, bit 47 is checked to see if it is 1 (not
13 overflowed) or 0 (overflowed).
14
15 When the perf NMI handler executes it does not know in advance which PMC
16 counters have overflowed. As such, the NMI handler will process all active
17 PMC counters that have overflowed. NMI latency in newer AMD processors can
18 result in multiple overflowed PMC counters being processed in one NMI and
19 then a subsequent NMI, that does not appear to be a back-to-back NMI, not
20 finding any PMC counters that have overflowed. This may appear to be an
21 unhandled NMI resulting in either a panic or a series of messages,
22 depending on how the kernel was configured.
23
24 To mitigate this issue, add an AMD handle_irq callback function,
25 amd_pmu_handle_irq(), that will invoke the common x86_pmu_handle_irq()
26 function and upon return perform some additional processing that will
27 indicate if the NMI has been handled or would have been handled had an
28 earlier NMI not handled the overflowed PMC. Using a per-CPU variable, a
29 minimum value of the number of active PMCs or 2 will be set whenever a
30 PMC is active. This is used to indicate the possible number of NMIs that
31 can still occur. The value of 2 is used for when an NMI does not arrive
32 at the LAPIC in time to be collapsed into an already pending NMI. Each
33 time the function is called without having handled an overflowed counter,
34 the per-CPU value is checked. If the value is non-zero, it is decremented
35 and the NMI indicates that it handled the NMI. If the value is zero, then
36 the NMI indicates that it did not handle the NMI.
37
38 Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
39 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
40 Cc: <stable@vger.kernel.org> # 4.14.x-
41 Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
42 Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
43 Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
44 Cc: Borislav Petkov <bp@alien8.de>
45 Cc: Jiri Olsa <jolsa@redhat.com>
46 Cc: Linus Torvalds <torvalds@linux-foundation.org>
47 Cc: Namhyung Kim <namhyung@kernel.org>
48 Cc: Peter Zijlstra <peterz@infradead.org>
49 Cc: Stephane Eranian <eranian@google.com>
50 Cc: Thomas Gleixner <tglx@linutronix.de>
51 Cc: Vince Weaver <vincent.weaver@maine.edu>
52 Link: https://lkml.kernel.org/r/Message-ID:
53 Signed-off-by: Ingo Molnar <mingo@kernel.org>
54 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
55
56 ---
57 arch/x86/events/amd/core.c | 56 ++++++++++++++++++++++++++++++++++++++++++++-
58 1 file changed, 55 insertions(+), 1 deletion(-)
59
60 --- a/arch/x86/events/amd/core.c
61 +++ b/arch/x86/events/amd/core.c
62 @@ -4,10 +4,13 @@
63 #include <linux/init.h>
64 #include <linux/slab.h>
65 #include <linux/delay.h>
66 +#include <linux/nmi.h>
67 #include <asm/apicdef.h>
68
69 #include "../perf_event.h"
70
71 +static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
72 +
73 static __initconst const u64 amd_hw_cache_event_ids
74 [PERF_COUNT_HW_CACHE_MAX]
75 [PERF_COUNT_HW_CACHE_OP_MAX]
76 @@ -488,6 +491,57 @@ static void amd_pmu_disable_all(void)
77 }
78 }
79
80 +/*
81 + * Because of NMI latency, if multiple PMC counters are active or other sources
82 + * of NMIs are received, the perf NMI handler can handle one or more overflowed
83 + * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
84 + * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
85 + * back-to-back NMI support won't be active. This PMC handler needs to take into
86 + * account that this can occur, otherwise this could result in unknown NMI
87 + * messages being issued. Examples of this is PMC overflow while in the NMI
88 + * handler when multiple PMCs are active or PMC overflow while handling some
89 + * other source of an NMI.
90 + *
91 + * Attempt to mitigate this by using the number of active PMCs to determine
92 + * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
93 + * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
94 + * number of active PMCs or 2. The value of 2 is used in case an NMI does not
95 + * arrive at the LAPIC in time to be collapsed into an already pending NMI.
96 + */
97 +static int amd_pmu_handle_irq(struct pt_regs *regs)
98 +{
99 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100 + int active, handled;
101 +
102 + /*
103 + * Obtain the active count before calling x86_pmu_handle_irq() since
104 + * it is possible that x86_pmu_handle_irq() may make a counter
105 + * inactive (through x86_pmu_stop).
106 + */
107 + active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
108 +
109 + /* Process any counter overflows */
110 + handled = x86_pmu_handle_irq(regs);
111 +
112 + /*
113 + * If a counter was handled, record the number of possible remaining
114 + * NMIs that can occur.
115 + */
116 + if (handled) {
117 + this_cpu_write(perf_nmi_counter,
118 + min_t(unsigned int, 2, active));
119 +
120 + return handled;
121 + }
122 +
123 + if (!this_cpu_read(perf_nmi_counter))
124 + return NMI_DONE;
125 +
126 + this_cpu_dec(perf_nmi_counter);
127 +
128 + return NMI_HANDLED;
129 +}
130 +
131 static struct event_constraint *
132 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
133 struct perf_event *event)
134 @@ -680,7 +734,7 @@ static ssize_t amd_event_sysfs_show(char
135
136 static __initconst const struct x86_pmu amd_pmu = {
137 .name = "AMD",
138 - .handle_irq = x86_pmu_handle_irq,
139 + .handle_irq = amd_pmu_handle_irq,
140 .disable_all = amd_pmu_disable_all,
141 .enable_all = x86_pmu_enable_all,
142 .enable = x86_pmu_enable_event,