From 575f417e39f4ffd3a37475d55d01d54208847447 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 4 Mar 2021 16:21:59 -0500 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- ...-flush-pending-rcuog-wakeup-before-l.patch | 79 ++++++++ ...itly-flush-pending-rcuog-wakeup-befo.patch | 159 +++++++++++++++ ...-self-ipi-on-late-deferred-wake-up-b.patch | 184 ++++++++++++++++++ queue-5.10/series | 3 + 4 files changed, 425 insertions(+) create mode 100644 queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch create mode 100644 queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch create mode 100644 queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch diff --git a/queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch b/queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch new file mode 100644 index 00000000000..a55efdbcc99 --- /dev/null +++ b/queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch @@ -0,0 +1,79 @@ +From 76c80c8df0818f1c919718d47d95904147200023 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Feb 2021 00:05:47 +0100 +Subject: entry: Explicitly flush pending rcuog wakeup before last rescheduling + point + +From: Frederic Weisbecker + +[ Upstream commit 47b8ff194c1fd73d58dc339b597d466fe48c8958 ] + +Following the idle loop model, cleanly check for pending rcuog wakeup +before the last rescheduling point on resuming to user mode. This +way we can avoid to do it from rcu_user_enter() with the last resort +self-IPI hack that enforces rescheduling. + +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-5-frederic@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/entry/common.c | 7 +++++++ + kernel/rcu/tree.c | 12 +++++++----- + 2 files changed, 14 insertions(+), 5 deletions(-) + +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index e9e2df3f3f9e..fcc7e93401d5 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -174,6 +174,10 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + * enabled above. + */ + local_irq_disable_exit_to_user(); ++ ++ /* Check if any of the above work has queued a deferred wakeup */ ++ rcu_nocb_flush_deferred_wakeup(); ++ + ti_work = READ_ONCE(current_thread_info()->flags); + } + +@@ -187,6 +191,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) + + lockdep_assert_irqs_disabled(); + ++ /* Flush pending rcuog wakeup before the last need_resched() check */ ++ rcu_nocb_flush_deferred_wakeup(); ++ + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index f137a599941b..0d8a2e2df221 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -699,13 +699,15 @@ noinstr void rcu_user_enter(void) + lockdep_assert_irqs_disabled(); + + /* +- * We may be past the last rescheduling opportunity in the entry code. +- * Trigger a self IPI that will fire and reschedule once we resume to +- * user/guest mode. ++ * Other than generic entry implementation, we may be past the last ++ * rescheduling opportunity in the entry code. Trigger a self IPI ++ * that will fire and reschedule once we resume in user/guest mode. + */ + instrumentation_begin(); +- if (do_nocb_deferred_wakeup(rdp) && need_resched()) +- irq_work_queue(this_cpu_ptr(&late_wakeup_work)); ++ if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) { ++ if (do_nocb_deferred_wakeup(rdp) && need_resched()) ++ irq_work_queue(this_cpu_ptr(&late_wakeup_work)); ++ } + instrumentation_end(); + + rcu_eqs_enter(true); +-- +2.30.1 + diff --git a/queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch b/queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch new file mode 100644 index 00000000000..e0ff8975383 --- /dev/null +++ b/queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch @@ -0,0 +1,159 @@ +From eeee2c3d420025d0ca1b0eb1aba6d6d912124d9d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Feb 2021 00:05:48 +0100 +Subject: entry/kvm: Explicitly flush pending rcuog wakeup before last + rescheduling point + +From: Frederic Weisbecker + +[ Upstream commit 4ae7dc97f726ea95c58ac58af71cc034ad22d7de ] + +Following the idle loop model, cleanly check for pending rcuog wakeup +before the last rescheduling point upon resuming to guest mode. This +way we can avoid to do it from rcu_user_enter() with the last resort +self-IPI hack that enforces rescheduling. + +Suggested-by: Peter Zijlstra +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-6-frederic@kernel.org +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 1 + + include/linux/entry-kvm.h | 14 +++++++++++++ + kernel/rcu/tree.c | 44 ++++++++++++++++++++++++++++++--------- + kernel/rcu/tree_plugin.h | 1 + + 4 files changed, 50 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index fa5f059c2b94..08bb14e3bd61 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1776,6 +1776,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); + + bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) + { ++ xfer_to_guest_mode_prepare(); + return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || + xfer_to_guest_mode_work_pending(); + } +diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h +index 0cef17afb41a..d60ab08f9058 100644 +--- a/include/linux/entry-kvm.h ++++ b/include/linux/entry-kvm.h +@@ -46,6 +46,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, + */ + int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); + ++/** ++ * xfer_to_guest_mode_prepare - Perform last minute preparation work that ++ * need to be handled while IRQs are disabled ++ * upon entering to guest. ++ * ++ * Has to be invoked with interrupts disabled before the last call ++ * to xfer_to_guest_mode_work_pending(). ++ */ ++static inline void xfer_to_guest_mode_prepare(void) ++{ ++ lockdep_assert_irqs_disabled(); ++ rcu_nocb_flush_deferred_wakeup(); ++} ++ + /** + * __xfer_to_guest_mode_work_pending - Check if work is pending + * +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index 0d8a2e2df221..eff2f7359a4c 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -670,9 +670,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); + + #ifdef CONFIG_NO_HZ_FULL + ++#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK) + /* + * An empty function that will trigger a reschedule on +- * IRQ tail once IRQs get re-enabled on userspace resume. ++ * IRQ tail once IRQs get re-enabled on userspace/guest resume. + */ + static void late_wakeup_func(struct irq_work *work) + { +@@ -681,6 +682,37 @@ static void late_wakeup_func(struct irq_work *work) + static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = + IRQ_WORK_INIT(late_wakeup_func); + ++/* ++ * If either: ++ * ++ * 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work ++ * 2) the task is about to enter in user mode and $ARCH doesn't support generic entry. ++ * ++ * In these cases the late RCU wake ups aren't supported in the resched loops and our ++ * last resort is to fire a local irq_work that will trigger a reschedule once IRQs ++ * get re-enabled again. ++ */ ++noinstr static void rcu_irq_work_resched(void) ++{ ++ struct rcu_data *rdp = this_cpu_ptr(&rcu_data); ++ ++ if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) ++ return; ++ ++ if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) ++ return; ++ ++ instrumentation_begin(); ++ if (do_nocb_deferred_wakeup(rdp) && need_resched()) { ++ irq_work_queue(this_cpu_ptr(&late_wakeup_work)); ++ } ++ instrumentation_end(); ++} ++ ++#else ++static inline void rcu_irq_work_resched(void) { } ++#endif ++ + /** + * rcu_user_enter - inform RCU that we are resuming userspace. + * +@@ -694,8 +726,6 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = + */ + noinstr void rcu_user_enter(void) + { +- struct rcu_data *rdp = this_cpu_ptr(&rcu_data); +- + lockdep_assert_irqs_disabled(); + + /* +@@ -703,13 +733,7 @@ noinstr void rcu_user_enter(void) + * rescheduling opportunity in the entry code. Trigger a self IPI + * that will fire and reschedule once we resume in user/guest mode. + */ +- instrumentation_begin(); +- if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) { +- if (do_nocb_deferred_wakeup(rdp) && need_resched()) +- irq_work_queue(this_cpu_ptr(&late_wakeup_work)); +- } +- instrumentation_end(); +- ++ rcu_irq_work_resched(); + rcu_eqs_enter(true); + } + +diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h +index 29a00d9ea286..a9351906e290 100644 +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -2197,6 +2197,7 @@ void rcu_nocb_flush_deferred_wakeup(void) + { + do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); + } ++EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); + + void __init rcu_init_nohz(void) + { +-- +2.30.1 + diff --git a/queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch b/queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch new file mode 100644 index 00000000000..a7db8c8089f --- /dev/null +++ b/queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch @@ -0,0 +1,184 @@ +From f5f7d56b45a5d6783a834b3e47864bc5eab7b574 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Feb 2021 00:05:46 +0100 +Subject: rcu/nocb: Trigger self-IPI on late deferred wake up before user + resume + +From: Frederic Weisbecker + +[ Upstream commit f8bb5cae9616224a39cbb399de382d36ac41df10 ] + +Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP +kthread (rcuog) to be serviced. + +Unfortunately the call to rcu_user_enter() is already past the last +rescheduling opportunity before we resume to userspace or to guest mode. +We may escape there with the woken task ignored. + +The ultimate resort to fix every callsites is to trigger a self-IPI +(nohz_full depends on arch to implement arch_irq_work_raise()) that will +trigger a reschedule on IRQ tail or guest exit. + +Eventually every site that want a saner treatment will need to carefully +place a call to rcu_nocb_flush_deferred_wakeup() before the last explicit +need_resched() check upon resume. + +Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf) +Reported-by: Paul E. McKenney +Signed-off-by: Frederic Weisbecker +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210131230548.32970-4-frederic@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/rcu/tree.c | 21 ++++++++++++++++++++- + kernel/rcu/tree.h | 2 +- + kernel/rcu/tree_plugin.h | 25 ++++++++++++++++--------- + 3 files changed, 37 insertions(+), 11 deletions(-) + +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index 5dc36c6e80fd..f137a599941b 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -669,6 +669,18 @@ void rcu_idle_enter(void) + EXPORT_SYMBOL_GPL(rcu_idle_enter); + + #ifdef CONFIG_NO_HZ_FULL ++ ++/* ++ * An empty function that will trigger a reschedule on ++ * IRQ tail once IRQs get re-enabled on userspace resume. ++ */ ++static void late_wakeup_func(struct irq_work *work) ++{ ++} ++ ++static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = ++ IRQ_WORK_INIT(late_wakeup_func); ++ + /** + * rcu_user_enter - inform RCU that we are resuming userspace. + * +@@ -686,12 +698,19 @@ noinstr void rcu_user_enter(void) + + lockdep_assert_irqs_disabled(); + ++ /* ++ * We may be past the last rescheduling opportunity in the entry code. ++ * Trigger a self IPI that will fire and reschedule once we resume to ++ * user/guest mode. ++ */ + instrumentation_begin(); +- do_nocb_deferred_wakeup(rdp); ++ if (do_nocb_deferred_wakeup(rdp) && need_resched()) ++ irq_work_queue(this_cpu_ptr(&late_wakeup_work)); + instrumentation_end(); + + rcu_eqs_enter(true); + } ++ + #endif /* CONFIG_NO_HZ_FULL */ + + /** +diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h +index e4f66b8f7c47..0ec2b1f66b13 100644 +--- a/kernel/rcu/tree.h ++++ b/kernel/rcu/tree.h +@@ -431,7 +431,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, + unsigned long flags); + static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp); ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); + static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); + static void rcu_spawn_cpu_nocb_kthread(int cpu); + static void __init rcu_spawn_nocb_kthreads(void); +diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h +index 7d4f78bf4057..29a00d9ea286 100644 +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -1631,8 +1631,8 @@ bool rcu_is_nocb_cpu(int cpu) + * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock + * and this function releases it. + */ +-static void wake_nocb_gp(struct rcu_data *rdp, bool force, +- unsigned long flags) ++static bool wake_nocb_gp(struct rcu_data *rdp, bool force, ++ unsigned long flags) + __releases(rdp->nocb_lock) + { + bool needwake = false; +@@ -1643,7 +1643,7 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, + TPS("AlreadyAwake")); + rcu_nocb_unlock_irqrestore(rdp, flags); +- return; ++ return false; + } + del_timer(&rdp->nocb_timer); + rcu_nocb_unlock_irqrestore(rdp, flags); +@@ -1656,6 +1656,8 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, + raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); + if (needwake) + wake_up_process(rdp_gp->nocb_gp_kthread); ++ ++ return needwake; + } + + /* +@@ -2152,20 +2154,23 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) + } + + /* Do a deferred wakeup of rcu_nocb_kthread(). */ +-static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp) + { + unsigned long flags; + int ndw; ++ int ret; + + rcu_nocb_lock_irqsave(rdp, flags); + if (!rcu_nocb_need_deferred_wakeup(rdp)) { + rcu_nocb_unlock_irqrestore(rdp, flags); +- return; ++ return false; + } + ndw = READ_ONCE(rdp->nocb_defer_wakeup); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); +- wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); ++ ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake")); ++ ++ return ret; + } + + /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ +@@ -2181,10 +2186,11 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t) + * This means we do an inexact common-case check. Note that if + * we miss, ->nocb_timer will eventually clean things up. + */ +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) + { + if (rcu_nocb_need_deferred_wakeup(rdp)) +- do_nocb_deferred_wakeup_common(rdp); ++ return do_nocb_deferred_wakeup_common(rdp); ++ return false; + } + + void rcu_nocb_flush_deferred_wakeup(void) +@@ -2523,8 +2529,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) + return false; + } + +-static void do_nocb_deferred_wakeup(struct rcu_data *rdp) ++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) + { ++ return false; + } + + static void rcu_spawn_cpu_nocb_kthread(int cpu) +-- +2.30.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 63b095b0f44..0aaab991384 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -35,3 +35,6 @@ asoc-qcom-remove-useless-debug-print.patch soundwire-debugfs-use-controller-id-instead-of-link_id.patch rsi-fix-tx-eapol-packet-handling-against-iwlwifi-ap.patch rsi-move-card-interrupt-handling-to-rx-thread.patch +rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch +entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch +entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch -- 2.47.3