From eff1bd201bff47c6ef9d892215c345e86f0eab1e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Mar 2021 12:51:19 +0100 Subject: [PATCH] 5.4-stable patches added patches: media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch --- ...de-processing-in-cio2_subdev_set_fmt.patch | 35 ++++ ...ibm-arch-vec-5-platform-support-scan.patch | 59 ++++++ ...efore-last-idle-s-need_resched-check.patch | 90 +++++++++ ...-deferred-wake-up-before-user-resume.patch | 174 ++++++++++++++++++ ...uog-wake-up-to-rcu_eqs_enter-callers.patch | 59 ++++++ queue-5.4/series | 7 + ...to-exit-vmx-root-if-vmx-is-supported.patch | 70 +++++++ ...eat-faults-on-vmxoff-in-reboot-flows.patch | 64 +++++++ 8 files changed, 558 insertions(+) create mode 100644 queue-5.4/media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch create mode 100644 queue-5.4/powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch create mode 100644 queue-5.4/rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch create mode 100644 queue-5.4/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch create mode 100644 queue-5.4/rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch create mode 100644 queue-5.4/x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch create mode 100644 queue-5.4/x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch diff --git a/queue-5.4/media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch b/queue-5.4/media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch new file mode 100644 index 00000000000..e40e981edae --- /dev/null +++ b/queue-5.4/media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch @@ -0,0 +1,35 @@ +From 334de4b45892f7e67074e1b1b2ac36fd3e091118 Mon Sep 17 00:00:00 2001 +From: Pavel Machek +Date: Wed, 30 Dec 2020 13:55:50 +0100 +Subject: media: ipu3-cio2: Fix mbus_code processing in cio2_subdev_set_fmt() + +From: Pavel Machek + +commit 334de4b45892f7e67074e1b1b2ac36fd3e091118 upstream. + +Loop was useless as it would always exit on the first iteration. Fix +it with right condition. + +Signed-off-by: Pavel Machek (CIP) +Fixes: a86cf9b29e8b ("media: ipu3-cio2: Validate mbus format in setting subdev format") +Tested-by: Laurent Pinchart +Reviewed-by: Laurent Pinchart +Cc: stable@vger.kernel.org # v4.16 and up +Signed-off-by: Sakari Ailus +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/pci/intel/ipu3/ipu3-cio2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c ++++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c +@@ -1288,7 +1288,7 @@ static int cio2_subdev_set_fmt(struct v4 + fmt->format.code = formats[0].mbus_code; + + for (i = 0; i < ARRAY_SIZE(formats); i++) { +- if (formats[i].mbus_code == fmt->format.code) { ++ if (formats[i].mbus_code == mbus_code) { + fmt->format.code = mbus_code; + break; + } diff --git a/queue-5.4/powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch b/queue-5.4/powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch new file mode 100644 index 00000000000..5f7408084c3 --- /dev/null +++ b/queue-5.4/powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch @@ -0,0 +1,59 @@ +From ed5b00a05c2ae95b59adc3442f45944ec632e794 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 22 Jan 2021 08:50:29 +0100 +Subject: powerpc/prom: Fix "ibm,arch-vec-5-platform-support" scan +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cédric Le Goater + +commit ed5b00a05c2ae95b59adc3442f45944ec632e794 upstream. + +The "ibm,arch-vec-5-platform-support" property is a list of pairs of +bytes representing the options and values supported by the platform +firmware. At boot time, Linux scans this list and activates the +available features it recognizes : Radix and XIVE. + +A recent change modified the number of entries to loop on and 8 bytes, +4 pairs of { options, values } entries are always scanned. This is +fine on KVM but not on PowerVM which can advertises less. As a +consequence on this platform, Linux reads extra entries pointing to +random data, interprets these as available features and tries to +activate them, leading to a firmware crash in +ibm,client-architecture-support. + +Fix that by using the property length of "ibm,arch-vec-5-platform-support". + +Fixes: ab91239942a9 ("powerpc/prom: Remove VLA in prom_check_platform_support()") +Cc: stable@vger.kernel.org # v4.20+ +Signed-off-by: Cédric Le Goater +Reviewed-by: Fabiano Rosas +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210122075029.797013-1-clg@kaod.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/kernel/prom_init.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +--- a/arch/powerpc/kernel/prom_init.c ++++ b/arch/powerpc/kernel/prom_init.c +@@ -1305,14 +1305,10 @@ static void __init prom_check_platform_s + if (prop_len > sizeof(vec)) + prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", + prop_len); +- prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", +- &vec, sizeof(vec)); +- for (i = 0; i < sizeof(vec); i += 2) { +- prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 +- , vec[i] +- , vec[i + 1]); +- prom_parse_platform_support(vec[i], vec[i + 1], +- &supported); ++ prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); ++ for (i = 0; i < prop_len; i += 2) { ++ prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); ++ prom_parse_platform_support(vec[i], vec[i + 1], &supported); + } + } + diff --git a/queue-5.4/rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch b/queue-5.4/rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch new file mode 100644 index 00000000000..8bd48bfb0a8 --- /dev/null +++ b/queue-5.4/rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch @@ -0,0 +1,90 @@ +From 43789ef3f7d61aa7bed0cb2764e588fc990c30ef Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Mon, 1 Feb 2021 00:05:45 +0100 +Subject: rcu/nocb: Perform deferred wake up before last idle's need_resched() check + +From: Frederic Weisbecker + +commit 43789ef3f7d61aa7bed0cb2764e588fc990c30ef upstream. + +Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP +kthread (rcuog) to be serviced. + +Usually a local wake up happening while running the idle task is handled +in one of the need_resched() checks carefully placed within the idle +loop that can break to the scheduler. + +Unfortunately the call to rcu_idle_enter() is already beyond the last +generic need_resched() check and we may halt the CPU with a resched +request unhandled, leaving the task hanging. + +Fix this with splitting the rcuog wakeup handling from rcu_idle_enter() +and place it before the last generic need_resched() check in the idle +loop. It is then assumed that no call to call_rcu() will be performed +after that in the idle loop until the CPU is put in low power mode. + +Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf) +Reported-by: Paul E. McKenney +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-3-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rcupdate.h | 2 ++ + kernel/rcu/tree.c | 3 --- + kernel/rcu/tree_plugin.h | 5 +++++ + kernel/sched/idle.c | 1 + + 4 files changed, 8 insertions(+), 3 deletions(-) + +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -96,8 +96,10 @@ static inline void rcu_user_exit(void) { + + #ifdef CONFIG_RCU_NOCB_CPU + void rcu_init_nohz(void); ++void rcu_nocb_flush_deferred_wakeup(void); + #else /* #ifdef CONFIG_RCU_NOCB_CPU */ + static inline void rcu_init_nohz(void) { } ++static inline void rcu_nocb_flush_deferred_wakeup(void) { } + #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + + /** +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -599,10 +599,7 @@ static void rcu_eqs_enter(bool user) + */ + void rcu_idle_enter(void) + { +- struct rcu_data *rdp = this_cpu_ptr(&rcu_data); +- + lockdep_assert_irqs_disabled(); +- do_nocb_deferred_wakeup(rdp); + rcu_eqs_enter(false); + } + +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -2190,6 +2190,11 @@ static void do_nocb_deferred_wakeup(stru + do_nocb_deferred_wakeup_common(rdp); + } + ++void rcu_nocb_flush_deferred_wakeup(void) ++{ ++ do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); ++} ++ + void __init rcu_init_nohz(void) + { + int cpu; +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -249,6 +249,7 @@ static void do_idle(void) + } + + arch_cpu_idle_enter(); ++ rcu_nocb_flush_deferred_wakeup(); + + /* + * In poll mode we reenable interrupts and spin. Also if we diff --git a/queue-5.4/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch b/queue-5.4/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch new file mode 100644 index 00000000000..1f8666278f6 --- /dev/null +++ b/queue-5.4/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch @@ -0,0 +1,174 @@ +From f8bb5cae9616224a39cbb399de382d36ac41df10 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Mon, 1 Feb 2021 00:05:46 +0100 +Subject: rcu/nocb: Trigger self-IPI on late deferred wake up before user resume + +From: Frederic Weisbecker + +commit f8bb5cae9616224a39cbb399de382d36ac41df10 upstream. + +Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP +kthread (rcuog) to be serviced. + +Unfortunately the call to rcu_user_enter() is already past the last +rescheduling opportunity before we resume to userspace or to guest mode. +We may escape there with the woken task ignored. + +The ultimate resort to fix every callsites is to trigger a self-IPI +(nohz_full depends on arch to implement arch_irq_work_raise()) that will +trigger a reschedule on IRQ tail or guest exit. + +Eventually every site that want a saner treatment will need to carefully +place a call to rcu_nocb_flush_deferred_wakeup() before the last explicit +need_resched() check upon resume. + +Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf) +Reported-by: Paul E. McKenney +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-4-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tree.c | 21 ++++++++++++++++++++- + kernel/rcu/tree.h | 2 +- + kernel/rcu/tree_plugin.h | 25 ++++++++++++++++--------- + 3 files changed, 37 insertions(+), 11 deletions(-) + +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -604,6 +604,18 @@ void rcu_idle_enter(void) + } + + #ifdef CONFIG_NO_HZ_FULL ++ ++/* ++ * An empty function that will trigger a reschedule on ++ * IRQ tail once IRQs get re-enabled on userspace resume. ++ */ ++static void late_wakeup_func(struct irq_work *work) ++{ ++} ++ ++static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = ++ IRQ_WORK_INIT(late_wakeup_func); ++ + /** + * rcu_user_enter - inform RCU that we are resuming userspace. + * +@@ -621,12 +633,19 @@ void rcu_user_enter(void) + + lockdep_assert_irqs_disabled(); + ++ /* ++ * We may be past the last rescheduling opportunity in the entry code. ++ * Trigger a self IPI that will fire and reschedule once we resume to ++ * user/guest mode. ++ */ + instrumentation_begin(); +- do_nocb_deferred_wakeup(rdp); ++ if (do_nocb_deferred_wakeup(rdp) && need_resched()) ++ irq_work_queue(this_cpu_ptr(&late_wakeup_work)); + instrumentation_end(); + + rcu_eqs_enter(true); + } ++ + #endif /* CONFIG_NO_HZ_FULL */ + + /* +--- a/kernel/rcu/tree.h ++++ b/kernel/rcu/tree.h +@@ -438,7 +438,7 @@ static bool rcu_nocb_try_bypass(struct r + static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, + unsigned long flags); + static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp); ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); + static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); + static void rcu_spawn_cpu_nocb_kthread(int cpu); + static void __init rcu_spawn_nocb_kthreads(void); +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -1639,8 +1639,8 @@ bool rcu_is_nocb_cpu(int cpu) + * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock + * and this function releases it. + */ +-static void wake_nocb_gp(struct rcu_data *rdp, bool force, +- unsigned long flags) ++static bool wake_nocb_gp(struct rcu_data *rdp, bool force, ++ unsigned long flags) + __releases(rdp->nocb_lock) + { + bool needwake = false; +@@ -1651,7 +1651,7 @@ static void wake_nocb_gp(struct rcu_data + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, + TPS("AlreadyAwake")); + rcu_nocb_unlock_irqrestore(rdp, flags); +- return; ++ return false; + } + del_timer(&rdp->nocb_timer); + rcu_nocb_unlock_irqrestore(rdp, flags); +@@ -1664,6 +1664,8 @@ static void wake_nocb_gp(struct rcu_data + raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); + if (needwake) + wake_up_process(rdp_gp->nocb_gp_kthread); ++ ++ return needwake; + } + + /* +@@ -2155,20 +2157,23 @@ static int rcu_nocb_need_deferred_wakeup + } + + /* Do a deferred wakeup of rcu_nocb_kthread(). */ +-static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp) + { + unsigned long flags; + int ndw; ++ int ret; + + rcu_nocb_lock_irqsave(rdp, flags); + if (!rcu_nocb_need_deferred_wakeup(rdp)) { + rcu_nocb_unlock_irqrestore(rdp, flags); +- return; ++ return false; + } + ndw = READ_ONCE(rdp->nocb_defer_wakeup); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); +- wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); ++ ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake")); ++ ++ return ret; + } + + /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ +@@ -2184,10 +2189,11 @@ static void do_nocb_deferred_wakeup_time + * This means we do an inexact common-case check. Note that if + * we miss, ->nocb_timer will eventually clean things up. + */ +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) + { + if (rcu_nocb_need_deferred_wakeup(rdp)) +- do_nocb_deferred_wakeup_common(rdp); ++ return do_nocb_deferred_wakeup_common(rdp); ++ return false; + } + + void rcu_nocb_flush_deferred_wakeup(void) +@@ -2527,8 +2533,9 @@ static int rcu_nocb_need_deferred_wakeup + return false; + } + +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) + { ++ return false; + } + + static void rcu_spawn_cpu_nocb_kthread(int cpu) diff --git a/queue-5.4/rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch b/queue-5.4/rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch new file mode 100644 index 00000000000..39b2d147e31 --- /dev/null +++ b/queue-5.4/rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch @@ -0,0 +1,59 @@ +From 54b7429efffc99e845ba9381bee3244f012a06c2 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Mon, 1 Feb 2021 00:05:44 +0100 +Subject: rcu: Pull deferred rcuog wake up to rcu_eqs_enter() callers + +From: Frederic Weisbecker + +commit 54b7429efffc99e845ba9381bee3244f012a06c2 upstream. + +Deferred wakeup of rcuog kthreads upon RCU idle mode entry is going to +be handled differently whether initiated by idle, user or guest. Prepare +with pulling that control up to rcu_eqs_enter() callers. + +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-2-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tree.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -579,7 +579,6 @@ static void rcu_eqs_enter(bool user) + trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); + rdp = this_cpu_ptr(&rcu_data); +- do_nocb_deferred_wakeup(rdp); + rcu_prepare_for_idle(); + rcu_preempt_deferred_qs(current); + WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ +@@ -600,7 +599,10 @@ static void rcu_eqs_enter(bool user) + */ + void rcu_idle_enter(void) + { ++ struct rcu_data *rdp = this_cpu_ptr(&rcu_data); ++ + lockdep_assert_irqs_disabled(); ++ do_nocb_deferred_wakeup(rdp); + rcu_eqs_enter(false); + } + +@@ -618,7 +620,14 @@ void rcu_idle_enter(void) + */ + void rcu_user_enter(void) + { ++ struct rcu_data *rdp = this_cpu_ptr(&rcu_data); ++ + lockdep_assert_irqs_disabled(); ++ ++ instrumentation_begin(); ++ do_nocb_deferred_wakeup(rdp); ++ instrumentation_end(); ++ + rcu_eqs_enter(true); + } + #endif /* CONFIG_NO_HZ_FULL */ diff --git a/queue-5.4/series b/queue-5.4/series index c0c8085f8bf..c6f14b04227 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -280,3 +280,10 @@ dts64-mt7622-fix-slow-sd-card-access.patch staging-mt7621-dma-mtk-hsdma.c-hsdma-mt7621.c.patch staging-gdm724x-fix-dma-from-stack.patch staging-rtl8188eu-add-edimax-ew-7811un-v2-to-device-table.patch +media-ipu3-cio2-fix-mbus_code-processing-in-cio2_subdev_set_fmt.patch +x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch +x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch +powerpc-prom-fix-ibm-arch-vec-5-platform-support-scan.patch +rcu-pull-deferred-rcuog-wake-up-to-rcu_eqs_enter-callers.patch +rcu-nocb-perform-deferred-wake-up-before-last-idle-s-need_resched-check.patch +rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-before-user-resume.patch diff --git a/queue-5.4/x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch b/queue-5.4/x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch new file mode 100644 index 00000000000..d8f308b513f --- /dev/null +++ b/queue-5.4/x86-reboot-force-all-cpus-to-exit-vmx-root-if-vmx-is-supported.patch @@ -0,0 +1,70 @@ +From ed72736183c45a413a8d6974dd04be90f514cb6b Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 30 Dec 2020 16:26:55 -0800 +Subject: x86/reboot: Force all cpus to exit VMX root if VMX is supported + +From: Sean Christopherson + +commit ed72736183c45a413a8d6974dd04be90f514cb6b upstream. + +Force all CPUs to do VMXOFF (via NMI shootdown) during an emergency +reboot if VMX is _supported_, as VMX being off on the current CPU does +not prevent other CPUs from being in VMX root (post-VMXON). This fixes +a bug where a crash/panic reboot could leave other CPUs in VMX root and +prevent them from being woken via INIT-SIPI-SIPI in the new kernel. + +Fixes: d176720d34c7 ("x86: disable VMX on all CPUs on reboot") +Cc: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Signed-off-by: David P. Reed +[sean: reworked changelog and further tweaked comment] +Signed-off-by: Sean Christopherson +Message-Id: <20201231002702.2223707-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/reboot.c | 29 ++++++++++------------------- + 1 file changed, 10 insertions(+), 19 deletions(-) + +--- a/arch/x86/kernel/reboot.c ++++ b/arch/x86/kernel/reboot.c +@@ -538,29 +538,20 @@ static void emergency_vmx_disable_all(vo + local_irq_disable(); + + /* +- * We need to disable VMX on all CPUs before rebooting, otherwise +- * we risk hanging up the machine, because the CPU ignore INIT +- * signals when VMX is enabled. ++ * Disable VMX on all CPUs before rebooting, otherwise we risk hanging ++ * the machine, because the CPU blocks INIT when it's in VMX root. + * +- * We can't take any locks and we may be on an inconsistent +- * state, so we use NMIs as IPIs to tell the other CPUs to disable +- * VMX and halt. ++ * We can't take any locks and we may be on an inconsistent state, so ++ * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt. + * +- * For safety, we will avoid running the nmi_shootdown_cpus() +- * stuff unnecessarily, but we don't have a way to check +- * if other CPUs have VMX enabled. So we will call it only if the +- * CPU we are running on has VMX enabled. +- * +- * We will miss cases where VMX is not enabled on all CPUs. This +- * shouldn't do much harm because KVM always enable VMX on all +- * CPUs anyway. But we can miss it on the small window where KVM +- * is still enabling VMX. ++ * Do the NMI shootdown even if VMX if off on _this_ CPU, as that ++ * doesn't prevent a different CPU from being in VMX root operation. + */ +- if (cpu_has_vmx() && cpu_vmx_enabled()) { +- /* Disable VMX on this CPU. */ +- cpu_vmxoff(); ++ if (cpu_has_vmx()) { ++ /* Safely force _this_ CPU out of VMX root operation. */ ++ __cpu_emergency_vmxoff(); + +- /* Halt and disable VMX on the other CPUs */ ++ /* Halt and exit VMX root operation on the other CPUs. */ + nmi_shootdown_cpus(vmxoff_nmi); + + } diff --git a/queue-5.4/x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch b/queue-5.4/x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch new file mode 100644 index 00000000000..3a02046b94f --- /dev/null +++ b/queue-5.4/x86-virt-eat-faults-on-vmxoff-in-reboot-flows.patch @@ -0,0 +1,64 @@ +From aec511ad153556640fb1de38bfe00c69464f997f Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 30 Dec 2020 16:26:54 -0800 +Subject: x86/virt: Eat faults on VMXOFF in reboot flows + +From: Sean Christopherson + +commit aec511ad153556640fb1de38bfe00c69464f997f upstream. + +Silently ignore all faults on VMXOFF in the reboot flows as such faults +are all but guaranteed to be due to the CPU not being in VMX root. +Because (a) VMXOFF may be executed in NMI context, e.g. after VMXOFF but +before CR4.VMXE is cleared, (b) there's no way to query the CPU's VMX +state without faulting, and (c) the whole point is to get out of VMX +root, eating faults is the simplest way to achieve the desired behaior. + +Technically, VMXOFF can fault (or fail) for other reasons, but all other +fault and failure scenarios are mode related, i.e. the kernel would have +to magically end up in RM, V86, compat mode, at CPL>0, or running with +the SMI Transfer Monitor active. The kernel is beyond hosed if any of +those scenarios are encountered; trying to do something fancy in the +error path to handle them cleanly is pointless. + +Fixes: 1e9931146c74 ("x86: asm/virtext.h: add cpu_vmxoff() inline function") +Reported-by: David P. Reed +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20201231002702.2223707-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/virtext.h | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/virtext.h ++++ b/arch/x86/include/asm/virtext.h +@@ -30,15 +30,22 @@ static inline int cpu_has_vmx(void) + } + + +-/** Disable VMX on the current CPU ++/** ++ * cpu_vmxoff() - Disable VMX on the current CPU + * +- * vmxoff causes a undefined-opcode exception if vmxon was not run +- * on the CPU previously. Only call this function if you know VMX +- * is enabled. ++ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) ++ * ++ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to ++ * atomically track post-VMXON state, e.g. this may be called in NMI context. ++ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. ++ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is ++ * magically in RM, VM86, compat mode, or at CPL>0. + */ + static inline void cpu_vmxoff(void) + { +- asm volatile ("vmxoff"); ++ asm_volatile_goto("1: vmxoff\n\t" ++ _ASM_EXTABLE(1b, %l[fault]) :::: fault); ++fault: + cr4_clear_bits(X86_CR4_VMXE); + } + -- 2.47.3