1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * CPU Microcode Update Driver for Linux
5 * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6 * 2006 Shaohua Li <shaohua.li@intel.com>
7 * 2013-2016 Borislav Petkov <bp@alien8.de>
9 * X86 CPU microcode early update for Linux:
11 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12 * H Peter Anvin" <hpa@zytor.com>
13 * (C) 2015 Borislav Petkov <bp@alien8.de>
15 * This driver allows to upgrade microcode on x86 processors.
18 #define pr_fmt(fmt) "microcode: " fmt
20 #include <linux/platform_device.h>
21 #include <linux/stop_machine.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/setup.h>
44 static struct microcode_ops
*microcode_ops
;
45 bool dis_ucode_ldr
= true;
47 bool force_minrev
= IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV
);
48 module_param(force_minrev
, bool, S_IRUSR
| S_IWUSR
);
53 * All non cpu-hotplug-callback call sites use:
55 * - cpus_read_lock/unlock() to synchronize with
56 * the cpu-hotplug-callback call sites.
58 * We guarantee that only a single cpu is being
59 * updated at any particular moment of time.
61 struct ucode_cpu_info ucode_cpu_info
[NR_CPUS
];
64 struct cpu_signature
*cpu_sig
;
69 * Those patch levels cannot be updated to newer ones and thus should be final.
71 static u32 final_levels
[] = {
75 0, /* T-101 terminator */
79 * Check the current patch level on this CPU.
82 * - true: if update should stop
85 static bool amd_check_current_patch_level(void)
90 native_rdmsr(MSR_AMD64_PATCH_LEVEL
, lvl
, dummy
);
92 levels
= final_levels
;
94 for (i
= 0; levels
[i
]; i
++) {
101 static bool __init
check_loader_disabled_bsp(void)
103 static const char *__dis_opt_str
= "dis_ucode_ldr";
104 const char *cmdline
= boot_command_line
;
105 const char *option
= __dis_opt_str
;
108 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
109 * completely accurate as xen pv guests don't see that CPUID bit set but
110 * that's good enough as they don't land on the BSP path anyway.
112 if (native_cpuid_ecx(1) & BIT(31))
115 if (x86_cpuid_vendor() == X86_VENDOR_AMD
) {
116 if (amd_check_current_patch_level())
120 if (cmdline_find_option_bool(cmdline
, option
) <= 0)
121 dis_ucode_ldr
= false;
123 return dis_ucode_ldr
;
126 void __init
load_ucode_bsp(void)
128 unsigned int cpuid_1_eax
;
134 cpuid_1_eax
= native_cpuid_eax(1);
136 switch (x86_cpuid_vendor()) {
137 case X86_VENDOR_INTEL
:
138 if (x86_family(cpuid_1_eax
) < 6)
143 if (x86_family(cpuid_1_eax
) < 0x10)
152 if (check_loader_disabled_bsp())
156 load_ucode_intel_bsp();
158 load_ucode_amd_bsp(cpuid_1_eax
);
161 void load_ucode_ap(void)
163 unsigned int cpuid_1_eax
;
168 cpuid_1_eax
= native_cpuid_eax(1);
170 switch (x86_cpuid_vendor()) {
171 case X86_VENDOR_INTEL
:
172 if (x86_family(cpuid_1_eax
) >= 6)
173 load_ucode_intel_ap();
176 if (x86_family(cpuid_1_eax
) >= 0x10)
177 load_ucode_amd_ap(cpuid_1_eax
);
184 struct cpio_data __init
find_microcode_in_initrd(const char *path
)
186 #ifdef CONFIG_BLK_DEV_INITRD
187 unsigned long start
= 0;
191 size
= boot_params
.hdr
.ramdisk_size
;
192 /* Early load on BSP has a temporary mapping. */
194 start
= initrd_start_early
;
196 #else /* CONFIG_X86_64 */
197 size
= (unsigned long)boot_params
.ext_ramdisk_size
<< 32;
198 size
|= boot_params
.hdr
.ramdisk_size
;
201 start
= (unsigned long)boot_params
.ext_ramdisk_image
<< 32;
202 start
|= boot_params
.hdr
.ramdisk_image
;
203 start
+= PAGE_OFFSET
;
208 * Fixup the start address: after reserve_initrd() runs, initrd_start
209 * has the virtual address of the beginning of the initrd. It also
210 * possibly relocates the ramdisk. In either case, initrd_start contains
211 * the updated address so use that instead.
214 start
= initrd_start
;
216 return find_cpio_data(path
, (void *)start
, size
, NULL
);
217 #else /* !CONFIG_BLK_DEV_INITRD */
218 return (struct cpio_data
){ NULL
, 0, "" };
222 static void reload_early_microcode(unsigned int cpu
)
226 vendor
= x86_cpuid_vendor();
227 family
= x86_cpuid_family();
230 case X86_VENDOR_INTEL
:
232 reload_ucode_intel();
236 reload_ucode_amd(cpu
);
243 /* fake device for request_firmware */
244 static struct platform_device
*microcode_pdev
;
246 #ifdef CONFIG_MICROCODE_LATE_LOADING
248 * Late loading dance. Why the heavy-handed stomp_machine effort?
250 * - HT siblings must be idle and not execute other code while the other sibling
251 * is loading microcode in order to avoid any negative interactions caused by
254 * - In addition, microcode update on the cores must be serialized until this
255 * requirement can be relaxed in the future. Right now, this is conservative
259 /* Spinwait with timeout */
261 /* Invoke the microcode_apply() callback */
263 /* Proceed without invoking the microcode_apply() callback */
267 struct microcode_ctrl
{
268 enum sibling_ctrl ctrl
;
269 enum ucode_state result
;
270 unsigned int ctrl_cpu
;
274 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable
);
275 static DEFINE_PER_CPU(struct microcode_ctrl
, ucode_ctrl
);
276 static atomic_t late_cpus_in
, offline_in_nmi
;
277 static unsigned int loops_per_usec
;
278 static cpumask_t cpu_offline_mask
;
280 static noinstr
bool wait_for_cpus(atomic_t
*cnt
)
282 unsigned int timeout
, loops
;
284 WARN_ON_ONCE(raw_atomic_dec_return(cnt
) < 0);
286 for (timeout
= 0; timeout
< USEC_PER_SEC
; timeout
++) {
287 if (!raw_atomic_read(cnt
))
290 for (loops
= 0; loops
< loops_per_usec
; loops
++)
293 /* If invoked directly, tickle the NMI watchdog */
294 if (!microcode_ops
->use_nmi
&& !(timeout
% USEC_PER_MSEC
)) {
295 instrumentation_begin();
296 touch_nmi_watchdog();
297 instrumentation_end();
300 /* Prevent the late comers from making progress and let them time out */
305 static noinstr
bool wait_for_ctrl(void)
307 unsigned int timeout
, loops
;
309 for (timeout
= 0; timeout
< USEC_PER_SEC
; timeout
++) {
310 if (raw_cpu_read(ucode_ctrl
.ctrl
) != SCTRL_WAIT
)
313 for (loops
= 0; loops
< loops_per_usec
; loops
++)
316 /* If invoked directly, tickle the NMI watchdog */
317 if (!microcode_ops
->use_nmi
&& !(timeout
% USEC_PER_MSEC
)) {
318 instrumentation_begin();
319 touch_nmi_watchdog();
320 instrumentation_end();
327 * Protected against instrumentation up to the point where the primary
328 * thread completed the update. See microcode_nmi_handler() for details.
330 static noinstr
bool load_secondary_wait(unsigned int ctrl_cpu
)
332 /* Initial rendezvous to ensure that all CPUs have arrived */
333 if (!wait_for_cpus(&late_cpus_in
)) {
334 raw_cpu_write(ucode_ctrl
.result
, UCODE_TIMEOUT
);
339 * Wait for primary threads to complete. If one of them hangs due
340 * to the update, there is no way out. This is non-recoverable
341 * because the CPU might hold locks or resources and confuse the
342 * scheduler, watchdogs etc. There is no way to safely evacuate the
348 instrumentation_begin();
349 panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu
);
350 instrumentation_end();
354 * Protected against instrumentation up to the point where the primary
355 * thread completed the update. See microcode_nmi_handler() for details.
357 static noinstr
void load_secondary(unsigned int cpu
)
359 unsigned int ctrl_cpu
= raw_cpu_read(ucode_ctrl
.ctrl_cpu
);
360 enum ucode_state ret
;
362 if (!load_secondary_wait(ctrl_cpu
)) {
363 instrumentation_begin();
364 pr_err_once("load: %d CPUs timed out\n",
365 atomic_read(&late_cpus_in
) - 1);
366 instrumentation_end();
370 /* Primary thread completed. Allow to invoke instrumentable code */
371 instrumentation_begin();
373 * If the primary succeeded then invoke the apply() callback,
374 * otherwise copy the state from the primary thread.
376 if (this_cpu_read(ucode_ctrl
.ctrl
) == SCTRL_APPLY
)
377 ret
= microcode_ops
->apply_microcode(cpu
);
379 ret
= per_cpu(ucode_ctrl
.result
, ctrl_cpu
);
381 this_cpu_write(ucode_ctrl
.result
, ret
);
382 this_cpu_write(ucode_ctrl
.ctrl
, SCTRL_DONE
);
383 instrumentation_end();
386 static void __load_primary(unsigned int cpu
)
388 struct cpumask
*secondaries
= topology_sibling_cpumask(cpu
);
389 enum sibling_ctrl ctrl
;
390 enum ucode_state ret
;
391 unsigned int sibling
;
393 /* Initial rendezvous to ensure that all CPUs have arrived */
394 if (!wait_for_cpus(&late_cpus_in
)) {
395 this_cpu_write(ucode_ctrl
.result
, UCODE_TIMEOUT
);
396 pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in
) - 1);
400 ret
= microcode_ops
->apply_microcode(cpu
);
401 this_cpu_write(ucode_ctrl
.result
, ret
);
402 this_cpu_write(ucode_ctrl
.ctrl
, SCTRL_DONE
);
405 * If the update was successful, let the siblings run the apply()
406 * callback. If not, tell them it's done. This also covers the
407 * case where the CPU has uniform loading at package or system
408 * scope implemented but does not advertise it.
410 if (ret
== UCODE_UPDATED
|| ret
== UCODE_OK
)
415 for_each_cpu(sibling
, secondaries
) {
417 per_cpu(ucode_ctrl
.ctrl
, sibling
) = ctrl
;
421 static bool kick_offline_cpus(unsigned int nr_offl
)
423 unsigned int cpu
, timeout
;
425 for_each_cpu(cpu
, &cpu_offline_mask
) {
426 /* Enable the rendezvous handler and send NMI */
427 per_cpu(ucode_ctrl
.nmi_enabled
, cpu
) = true;
428 apic_send_nmi_to_offline_cpu(cpu
);
431 /* Wait for them to arrive */
432 for (timeout
= 0; timeout
< (USEC_PER_SEC
/ 2); timeout
++) {
433 if (atomic_read(&offline_in_nmi
) == nr_offl
)
437 /* Let the others time out */
441 static void release_offline_cpus(void)
445 for_each_cpu(cpu
, &cpu_offline_mask
)
446 per_cpu(ucode_ctrl
.ctrl
, cpu
) = SCTRL_DONE
;
449 static void load_primary(unsigned int cpu
)
451 unsigned int nr_offl
= cpumask_weight(&cpu_offline_mask
);
454 /* Kick soft-offlined SMT siblings if required */
456 proceed
= kick_offline_cpus(nr_offl
);
458 /* If the soft-offlined CPUs did not respond, abort */
462 /* Unconditionally release soft-offlined SMT siblings if required */
464 release_offline_cpus();
468 * Minimal stub rendezvous handler for soft-offlined CPUs which participate
469 * in the NMI rendezvous to protect against a concurrent NMI on affected
472 void noinstr
microcode_offline_nmi_handler(void)
474 if (!raw_cpu_read(ucode_ctrl
.nmi_enabled
))
476 raw_cpu_write(ucode_ctrl
.nmi_enabled
, false);
477 raw_cpu_write(ucode_ctrl
.result
, UCODE_OFFLINE
);
478 raw_atomic_inc(&offline_in_nmi
);
482 static noinstr
bool microcode_update_handler(void)
484 unsigned int cpu
= raw_smp_processor_id();
486 if (raw_cpu_read(ucode_ctrl
.ctrl_cpu
) == cpu
) {
487 instrumentation_begin();
489 instrumentation_end();
494 instrumentation_begin();
495 touch_nmi_watchdog();
496 instrumentation_end();
502 * Protection against instrumentation is required for CPUs which are not
503 * safe against an NMI which is delivered to the secondary SMT sibling
504 * while the primary thread updates the microcode. Instrumentation can end
505 * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
506 * which is the opposite of what the NMI rendezvous is trying to achieve.
508 * The primary thread is safe versus instrumentation as the actual
509 * microcode update handles this correctly. It's only the sibling code
510 * path which must be NMI safe until the primary thread completed the
513 bool noinstr
microcode_nmi_handler(void)
515 if (!raw_cpu_read(ucode_ctrl
.nmi_enabled
))
518 raw_cpu_write(ucode_ctrl
.nmi_enabled
, false);
519 return microcode_update_handler();
522 static int load_cpus_stopped(void *unused
)
524 if (microcode_ops
->use_nmi
) {
525 /* Enable the NMI handler and raise NMI */
526 this_cpu_write(ucode_ctrl
.nmi_enabled
, true);
527 apic
->send_IPI(smp_processor_id(), NMI_VECTOR
);
529 /* Just invoke the handler directly */
530 microcode_update_handler();
535 static int load_late_stop_cpus(bool is_safe
)
537 unsigned int cpu
, updated
= 0, failed
= 0, timedout
= 0, siblings
= 0;
538 unsigned int nr_offl
, offline
= 0;
539 int old_rev
= boot_cpu_data
.microcode
;
540 struct cpuinfo_x86 prev_info
;
543 pr_err("Late microcode loading without minimal revision check.\n");
544 pr_err("You should switch to early loading, if possible.\n");
547 atomic_set(&late_cpus_in
, num_online_cpus());
548 atomic_set(&offline_in_nmi
, 0);
549 loops_per_usec
= loops_per_jiffy
/ (TICK_NSEC
/ 1000);
552 * Take a snapshot before the microcode update in order to compare and
553 * check whether any bits changed after an update.
555 store_cpu_caps(&prev_info
);
557 if (microcode_ops
->use_nmi
)
558 static_branch_enable_cpuslocked(µcode_nmi_handler_enable
);
560 stop_machine_cpuslocked(load_cpus_stopped
, NULL
, cpu_online_mask
);
562 if (microcode_ops
->use_nmi
)
563 static_branch_disable_cpuslocked(µcode_nmi_handler_enable
);
565 /* Analyze the results */
566 for_each_cpu_and(cpu
, cpu_present_mask
, &cpus_booted_once_mask
) {
567 switch (per_cpu(ucode_ctrl
.result
, cpu
)) {
568 case UCODE_UPDATED
: updated
++; break;
569 case UCODE_TIMEOUT
: timedout
++; break;
570 case UCODE_OK
: siblings
++; break;
571 case UCODE_OFFLINE
: offline
++; break;
572 default: failed
++; break;
576 if (microcode_ops
->finalize_late_load
)
577 microcode_ops
->finalize_late_load(!updated
);
580 /* Nothing changed. */
581 if (!failed
&& !timedout
)
584 nr_offl
= cpumask_weight(&cpu_offline_mask
);
585 if (offline
< nr_offl
) {
586 pr_warn("%u offline siblings did not respond.\n",
587 nr_offl
- atomic_read(&offline_in_nmi
));
590 pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
595 if (!is_safe
|| failed
|| timedout
)
596 add_taint(TAINT_CPU_OUT_OF_SPEC
, LOCKDEP_STILL_OK
);
598 pr_info("load: updated on %u primary CPUs with %u siblings\n", updated
, siblings
);
599 if (failed
|| timedout
) {
600 pr_err("load incomplete. %u CPUs timed out or failed\n",
601 num_online_cpus() - (updated
+ siblings
));
603 pr_info("revision: 0x%x -> 0x%x\n", old_rev
, boot_cpu_data
.microcode
);
604 microcode_check(&prev_info
);
606 return updated
+ siblings
== num_online_cpus() ? 0 : -EIO
;
610 * This function does two things:
612 * 1) Ensure that all required CPUs which are present and have been booted
615 * To pass this check, all primary threads must be online.
617 * If the microcode load is not safe against NMI then all SMT threads
618 * must be online as well because they still react to NMIs when they are
619 * soft-offlined and parked in one of the play_dead() variants. So if a
620 * NMI hits while the primary thread updates the microcode the resulting
621 * behaviour is undefined. The default play_dead() implementation on
622 * modern CPUs uses MWAIT, which is also not guaranteed to be safe
623 * against a microcode update which affects MWAIT.
625 * As soft-offlined CPUs still react on NMIs, the SMT sibling
626 * restriction can be lifted when the vendor driver signals to use NMI
627 * for rendezvous and the APIC provides a mechanism to send an NMI to a
628 * soft-offlined CPU. The soft-offlined CPUs are then able to
629 * participate in the rendezvous in a trivial stub handler.
631 * 2) Initialize the per CPU control structure and create a cpumask
632 * which contains "offline"; secondary threads, so they can be handled
633 * correctly by a control CPU.
635 static bool setup_cpus(void)
637 struct microcode_ctrl ctrl
= { .ctrl
= SCTRL_WAIT
, .result
= -1, };
638 bool allow_smt_offline
;
641 allow_smt_offline
= microcode_ops
->nmi_safe
||
642 (microcode_ops
->use_nmi
&& apic
->nmi_to_offline_cpu
);
644 cpumask_clear(&cpu_offline_mask
);
646 for_each_cpu_and(cpu
, cpu_present_mask
, &cpus_booted_once_mask
) {
648 * Offline CPUs sit in one of the play_dead() functions
649 * with interrupts disabled, but they still react on NMIs
650 * and execute arbitrary code. Also MWAIT being updated
651 * while the offline CPU sits there is not necessarily safe
652 * on all CPU variants.
654 * Mark them in the offline_cpus mask which will be handled
655 * by CPU0 later in the update process.
657 * Ensure that the primary thread is online so that it is
658 * guaranteed that all cores are updated.
660 if (!cpu_online(cpu
)) {
661 if (topology_is_primary_thread(cpu
) || !allow_smt_offline
) {
662 pr_err("CPU %u not online, loading aborted\n", cpu
);
665 cpumask_set_cpu(cpu
, &cpu_offline_mask
);
666 per_cpu(ucode_ctrl
, cpu
) = ctrl
;
671 * Initialize the per CPU state. This is core scope for now,
672 * but prepared to take package or system scope into account.
674 ctrl
.ctrl_cpu
= cpumask_first(topology_sibling_cpumask(cpu
));
675 per_cpu(ucode_ctrl
, cpu
) = ctrl
;
680 static int load_late_locked(void)
685 switch (microcode_ops
->request_microcode_fw(0, µcode_pdev
->dev
)) {
687 return load_late_stop_cpus(false);
689 return load_late_stop_cpus(true);
697 static ssize_t
reload_store(struct device
*dev
,
698 struct device_attribute
*attr
,
699 const char *buf
, size_t size
)
704 ret
= kstrtoul(buf
, 0, &val
);
709 ret
= load_late_locked();
715 static DEVICE_ATTR_WO(reload
);
718 static ssize_t
version_show(struct device
*dev
,
719 struct device_attribute
*attr
, char *buf
)
721 struct ucode_cpu_info
*uci
= ucode_cpu_info
+ dev
->id
;
723 return sprintf(buf
, "0x%x\n", uci
->cpu_sig
.rev
);
726 static ssize_t
processor_flags_show(struct device
*dev
,
727 struct device_attribute
*attr
, char *buf
)
729 struct ucode_cpu_info
*uci
= ucode_cpu_info
+ dev
->id
;
731 return sprintf(buf
, "0x%x\n", uci
->cpu_sig
.pf
);
734 static DEVICE_ATTR_RO(version
);
735 static DEVICE_ATTR_RO(processor_flags
);
737 static struct attribute
*mc_default_attrs
[] = {
738 &dev_attr_version
.attr
,
739 &dev_attr_processor_flags
.attr
,
743 static const struct attribute_group mc_attr_group
= {
744 .attrs
= mc_default_attrs
,
748 static void microcode_fini_cpu(int cpu
)
750 if (microcode_ops
->microcode_fini_cpu
)
751 microcode_ops
->microcode_fini_cpu(cpu
);
755 * microcode_bsp_resume - Update boot CPU microcode during resume.
757 void microcode_bsp_resume(void)
759 int cpu
= smp_processor_id();
760 struct ucode_cpu_info
*uci
= ucode_cpu_info
+ cpu
;
763 microcode_ops
->apply_microcode(cpu
);
765 reload_early_microcode(cpu
);
768 static struct syscore_ops mc_syscore_ops
= {
769 .resume
= microcode_bsp_resume
,
772 static int mc_cpu_online(unsigned int cpu
)
774 struct ucode_cpu_info
*uci
= ucode_cpu_info
+ cpu
;
775 struct device
*dev
= get_cpu_device(cpu
);
777 memset(uci
, 0, sizeof(*uci
));
779 microcode_ops
->collect_cpu_info(cpu
, &uci
->cpu_sig
);
780 cpu_data(cpu
).microcode
= uci
->cpu_sig
.rev
;
782 boot_cpu_data
.microcode
= uci
->cpu_sig
.rev
;
784 if (sysfs_create_group(&dev
->kobj
, &mc_attr_group
))
785 pr_err("Failed to create group for CPU%d\n", cpu
);
789 static int mc_cpu_down_prep(unsigned int cpu
)
791 struct device
*dev
= get_cpu_device(cpu
);
793 microcode_fini_cpu(cpu
);
794 sysfs_remove_group(&dev
->kobj
, &mc_attr_group
);
798 static struct attribute
*cpu_root_microcode_attrs
[] = {
799 #ifdef CONFIG_MICROCODE_LATE_LOADING
800 &dev_attr_reload
.attr
,
805 static const struct attribute_group cpu_root_microcode_group
= {
807 .attrs
= cpu_root_microcode_attrs
,
810 static int __init
microcode_init(void)
812 struct device
*dev_root
;
813 struct cpuinfo_x86
*c
= &boot_cpu_data
;
819 if (c
->x86_vendor
== X86_VENDOR_INTEL
)
820 microcode_ops
= init_intel_microcode();
821 else if (c
->x86_vendor
== X86_VENDOR_AMD
)
822 microcode_ops
= init_amd_microcode();
824 pr_err("no support for this CPU vendor\n");
829 microcode_pdev
= platform_device_register_simple("microcode", -1, NULL
, 0);
830 if (IS_ERR(microcode_pdev
))
831 return PTR_ERR(microcode_pdev
);
833 dev_root
= bus_get_dev_root(&cpu_subsys
);
835 error
= sysfs_create_group(&dev_root
->kobj
, &cpu_root_microcode_group
);
836 put_device(dev_root
);
838 pr_err("Error creating microcode group!\n");
843 register_syscore_ops(&mc_syscore_ops
);
844 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN
, "x86/microcode:online",
845 mc_cpu_online
, mc_cpu_down_prep
);
850 platform_device_unregister(microcode_pdev
);
854 late_initcall(microcode_init
);