]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.14.34/x86-microcode-synchronize-late-microcode-loading.patch
Linux 4.14.95
[thirdparty/kernel/stable-queue.git] / releases / 4.14.34 / x86-microcode-synchronize-late-microcode-loading.patch
1 From a5321aec6412b20b5ad15db2d6b916c05349dbff Mon Sep 17 00:00:00 2001
2 From: Ashok Raj <ashok.raj@intel.com>
3 Date: Wed, 28 Feb 2018 11:28:46 +0100
4 Subject: x86/microcode: Synchronize late microcode loading
5
6 From: Ashok Raj <ashok.raj@intel.com>
7
8 commit a5321aec6412b20b5ad15db2d6b916c05349dbff upstream.
9
10 Original idea by Ashok, completely rewritten by Borislav.
11
12 Before you read any further: the early loading method is still the
13 preferred one and you should always do that. The following patch is
14 improving the late loading mechanism for long running jobs and cloud use
15 cases.
16
17 Gather all cores and serialize the microcode update on them by doing it
18 one-by-one to make the late update process as reliable as possible and
19 avoid potential issues caused by the microcode update.
20
21 [ Borislav: Rewrite completely. ]
22
23 Co-developed-by: Borislav Petkov <bp@suse.de>
24 Signed-off-by: Ashok Raj <ashok.raj@intel.com>
25 Signed-off-by: Borislav Petkov <bp@suse.de>
26 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
27 Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
28 Tested-by: Ashok Raj <ashok.raj@intel.com>
29 Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
30 Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
31 Link: https://lkml.kernel.org/r/20180228102846.13447-8-bp@alien8.de
32 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
33
34 ---
35 arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++--------
36 1 file changed, 92 insertions(+), 26 deletions(-)
37
38 --- a/arch/x86/kernel/cpu/microcode/core.c
39 +++ b/arch/x86/kernel/cpu/microcode/core.c
40 @@ -22,13 +22,16 @@
41 #define pr_fmt(fmt) "microcode: " fmt
42
43 #include <linux/platform_device.h>
44 +#include <linux/stop_machine.h>
45 #include <linux/syscore_ops.h>
46 #include <linux/miscdevice.h>
47 #include <linux/capability.h>
48 #include <linux/firmware.h>
49 #include <linux/kernel.h>
50 +#include <linux/delay.h>
51 #include <linux/mutex.h>
52 #include <linux/cpu.h>
53 +#include <linux/nmi.h>
54 #include <linux/fs.h>
55 #include <linux/mm.h>
56
57 @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
58 */
59 static DEFINE_MUTEX(microcode_mutex);
60
61 +/*
62 + * Serialize late loading so that CPUs get updated one-by-one.
63 + */
64 +static DEFINE_SPINLOCK(update_lock);
65 +
66 struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
67
68 struct cpu_info_ctx {
69 @@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(vo
70 /* fake device for request_firmware */
71 static struct platform_device *microcode_pdev;
72
73 +/*
74 + * Late loading dance. Why the heavy-handed stomp_machine effort?
75 + *
76 + * - HT siblings must be idle and not execute other code while the other sibling
77 + * is loading microcode in order to avoid any negative interactions caused by
78 + * the loading.
79 + *
80 + * - In addition, microcode update on the cores must be serialized until this
81 + * requirement can be relaxed in the future. Right now, this is conservative
82 + * and good.
83 + */
84 +#define SPINUNIT 100 /* 100 nsec */
85 +
86 static int check_online_cpus(void)
87 {
88 if (num_online_cpus() == num_present_cpus())
89 @@ -496,23 +517,85 @@ static int check_online_cpus(void)
90 return -EINVAL;
91 }
92
93 -static enum ucode_state reload_for_cpu(int cpu)
94 +static atomic_t late_cpus;
95 +
96 +/*
97 + * Returns:
98 + * < 0 - on error
99 + * 0 - no update done
100 + * 1 - microcode was updated
101 + */
102 +static int __reload_late(void *info)
103 {
104 - struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
105 + unsigned int timeout = NSEC_PER_SEC;
106 + int all_cpus = num_online_cpus();
107 + int cpu = smp_processor_id();
108 + enum ucode_state err;
109 + int ret = 0;
110 +
111 + atomic_dec(&late_cpus);
112 +
113 + /*
114 + * Wait for all CPUs to arrive. A load will not be attempted unless all
115 + * CPUs show up.
116 + * */
117 + while (atomic_read(&late_cpus)) {
118 + if (timeout < SPINUNIT) {
119 + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
120 + atomic_read(&late_cpus));
121 + return -1;
122 + }
123 +
124 + ndelay(SPINUNIT);
125 + timeout -= SPINUNIT;
126 +
127 + touch_nmi_watchdog();
128 + }
129 +
130 + spin_lock(&update_lock);
131 + apply_microcode_local(&err);
132 + spin_unlock(&update_lock);
133 +
134 + if (err > UCODE_NFOUND) {
135 + pr_warn("Error reloading microcode on CPU %d\n", cpu);
136 + ret = -1;
137 + } else if (err == UCODE_UPDATED) {
138 + ret = 1;
139 + }
140
141 - if (!uci->valid)
142 - return UCODE_OK;
143 + atomic_inc(&late_cpus);
144
145 - return apply_microcode_on_target(cpu);
146 + while (atomic_read(&late_cpus) != all_cpus)
147 + cpu_relax();
148 +
149 + return ret;
150 +}
151 +
152 +/*
153 + * Reload microcode late on all CPUs. Wait for a sec until they
154 + * all gather together.
155 + */
156 +static int microcode_reload_late(void)
157 +{
158 + int ret;
159 +
160 + atomic_set(&late_cpus, num_online_cpus());
161 +
162 + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
163 + if (ret < 0)
164 + return ret;
165 + else if (ret > 0)
166 + microcode_check();
167 +
168 + return ret;
169 }
170
171 static ssize_t reload_store(struct device *dev,
172 struct device_attribute *attr,
173 const char *buf, size_t size)
174 {
175 - int cpu, bsp = boot_cpu_data.cpu_index;
176 enum ucode_state tmp_ret = UCODE_OK;
177 - bool do_callback = false;
178 + int bsp = boot_cpu_data.cpu_index;
179 unsigned long val;
180 ssize_t ret = 0;
181
182 @@ -534,30 +617,13 @@ static ssize_t reload_store(struct devic
183 goto put;
184
185 mutex_lock(&microcode_mutex);
186 -
187 - for_each_online_cpu(cpu) {
188 - tmp_ret = reload_for_cpu(cpu);
189 - if (tmp_ret > UCODE_NFOUND) {
190 - pr_warn("Error reloading microcode on CPU %d\n", cpu);
191 -
192 - /* set retval for the first encountered reload error */
193 - if (!ret)
194 - ret = -EINVAL;
195 - }
196 -
197 - if (tmp_ret == UCODE_UPDATED)
198 - do_callback = true;
199 - }
200 -
201 - if (!ret && do_callback)
202 - microcode_check();
203 -
204 + ret = microcode_reload_late();
205 mutex_unlock(&microcode_mutex);
206
207 put:
208 put_online_cpus();
209
210 - if (!ret)
211 + if (ret >= 0)
212 ret = size;
213
214 return ret;