]>
Commit | Line | Data |
---|---|---|
d33cec38 GKH |
1 | From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001 |
2 | From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com> | |
3 | Date: Tue, 2 Apr 2019 15:21:14 +0000 | |
4 | Subject: x86/perf/amd: Resolve race condition when disabling PMC | |
5 | ||
6 | From: Lendacky, Thomas <Thomas.Lendacky@amd.com> | |
7 | ||
8 | commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream. | |
9 | ||
10 | On AMD processors, the detection of an overflowed counter in the NMI | |
11 | handler relies on the current value of the counter. So, for example, to | |
12 | check for overflow on a 48 bit counter, bit 47 is checked to see if it | |
13 | is 1 (not overflowed) or 0 (overflowed). | |
14 | ||
15 | There is currently a race condition present when disabling and then | |
16 | updating the PMC. Increased NMI latency in newer AMD processors makes this | |
17 | race condition more pronounced. If the counter value has overflowed, it is | |
18 | possible to update the PMC value before the NMI handler can run. The | |
19 | updated PMC value is not an overflowed value, so when the perf NMI handler | |
20 | does run, it will not find an overflowed counter. This may appear as an | |
21 | unknown NMI resulting in either a panic or a series of messages, depending | |
22 | on how the kernel is configured. | |
23 | ||
24 | To eliminate this race condition, the PMC value must be checked after | |
25 | disabling the counter. Add an AMD function, amd_pmu_disable_all(), that | |
26 | will wait for the NMI handler to reset any active and overflowed counter | |
27 | after calling x86_pmu_disable_all(). | |
28 | ||
29 | Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> | |
30 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
31 | Cc: <stable@vger.kernel.org> # 4.14.x- | |
32 | Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> | |
33 | Cc: Arnaldo Carvalho de Melo <acme@kernel.org> | |
34 | Cc: Arnaldo Carvalho de Melo <acme@redhat.com> | |
35 | Cc: Borislav Petkov <bp@alien8.de> | |
36 | Cc: Jiri Olsa <jolsa@redhat.com> | |
37 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
38 | Cc: Namhyung Kim <namhyung@kernel.org> | |
39 | Cc: Peter Zijlstra <peterz@infradead.org> | |
40 | Cc: Stephane Eranian <eranian@google.com> | |
41 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
42 | Cc: Vince Weaver <vincent.weaver@maine.edu> | |
43 | Link: https://lkml.kernel.org/r/Message-ID: | |
44 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
45 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
46 | ||
47 | --- | |
48 | arch/x86/events/amd/core.c | 65 ++++++++++++++++++++++++++++++++++++++++++--- | |
49 | 1 file changed, 62 insertions(+), 3 deletions(-) | |
50 | ||
51 | --- a/arch/x86/events/amd/core.c | |
52 | +++ b/arch/x86/events/amd/core.c | |
53 | @@ -3,6 +3,7 @@ | |
54 | #include <linux/types.h> | |
55 | #include <linux/init.h> | |
56 | #include <linux/slab.h> | |
57 | +#include <linux/delay.h> | |
58 | #include <asm/apicdef.h> | |
59 | ||
60 | #include "../perf_event.h" | |
61 | @@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu) | |
62 | } | |
63 | } | |
64 | ||
65 | +/* | |
66 | + * When a PMC counter overflows, an NMI is used to process the event and | |
67 | + * reset the counter. NMI latency can result in the counter being updated | |
68 | + * before the NMI can run, which can result in what appear to be spurious | |
69 | + * NMIs. This function is intended to wait for the NMI to run and reset | |
70 | + * the counter to avoid possible unhandled NMI messages. | |
71 | + */ | |
72 | +#define OVERFLOW_WAIT_COUNT 50 | |
73 | + | |
74 | +static void amd_pmu_wait_on_overflow(int idx) | |
75 | +{ | |
76 | + unsigned int i; | |
77 | + u64 counter; | |
78 | + | |
79 | + /* | |
80 | + * Wait for the counter to be reset if it has overflowed. This loop | |
81 | + * should exit very, very quickly, but just in case, don't wait | |
82 | + * forever... | |
83 | + */ | |
84 | + for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { | |
85 | + rdmsrl(x86_pmu_event_addr(idx), counter); | |
86 | + if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) | |
87 | + break; | |
88 | + | |
89 | + /* Might be in IRQ context, so can't sleep */ | |
90 | + udelay(1); | |
91 | + } | |
92 | +} | |
93 | + | |
94 | +static void amd_pmu_disable_all(void) | |
95 | +{ | |
96 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | |
97 | + int idx; | |
98 | + | |
99 | + x86_pmu_disable_all(); | |
100 | + | |
101 | + /* | |
102 | + * This shouldn't be called from NMI context, but add a safeguard here | |
103 | + * to return, since if we're in NMI context we can't wait for an NMI | |
104 | + * to reset an overflowed counter value. | |
105 | + */ | |
106 | + if (in_nmi()) | |
107 | + return; | |
108 | + | |
109 | + /* | |
110 | + * Check each counter for overflow and wait for it to be reset by the | |
111 | + * NMI if it has overflowed. This relies on the fact that all active | |
112 | + * counters are always enabled when this function is caled and | |
113 | + * ARCH_PERFMON_EVENTSEL_INT is always set. | |
114 | + */ | |
115 | + for (idx = 0; idx < x86_pmu.num_counters; idx++) { | |
116 | + if (!test_bit(idx, cpuc->active_mask)) | |
117 | + continue; | |
118 | + | |
119 | + amd_pmu_wait_on_overflow(idx); | |
120 | + } | |
121 | +} | |
122 | + | |
123 | static struct event_constraint * | |
124 | amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | |
125 | struct perf_event *event) | |
126 | @@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char | |
127 | static __initconst const struct x86_pmu amd_pmu = { | |
128 | .name = "AMD", | |
129 | .handle_irq = x86_pmu_handle_irq, | |
130 | - .disable_all = x86_pmu_disable_all, | |
131 | + .disable_all = amd_pmu_disable_all, | |
132 | .enable_all = x86_pmu_enable_all, | |
133 | .enable = x86_pmu_enable_event, | |
134 | .disable = x86_pmu_disable_event, | |
135 | @@ -728,7 +787,7 @@ void amd_pmu_enable_virt(void) | |
136 | cpuc->perf_ctr_virt_mask = 0; | |
137 | ||
138 | /* Reload all events */ | |
139 | - x86_pmu_disable_all(); | |
140 | + amd_pmu_disable_all(); | |
141 | x86_pmu_enable_all(0); | |
142 | } | |
143 | EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); | |
144 | @@ -746,7 +805,7 @@ void amd_pmu_disable_virt(void) | |
145 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; | |
146 | ||
147 | /* Reload all events */ | |
148 | - x86_pmu_disable_all(); | |
149 | + amd_pmu_disable_all(); | |
150 | x86_pmu_enable_all(0); | |
151 | } | |
152 | EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |