]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Thu, 4 Mar 2021 21:21:59 +0000 (16:21 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 4 Mar 2021 21:21:59 +0000 (16:21 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch [new file with mode: 0644]
queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch [new file with mode: 0644]
queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch b/queue-5.10/entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch
new file mode 100644 (file)
index 0000000..a55efdb
--- /dev/null
@@ -0,0 +1,79 @@
+From 76c80c8df0818f1c919718d47d95904147200023 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 00:05:47 +0100
+Subject: entry: Explicitly flush pending rcuog wakeup before last rescheduling
+ point
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit 47b8ff194c1fd73d58dc339b597d466fe48c8958 ]
+
+Following the idle loop model, cleanly check for pending rcuog wakeup
+before the last rescheduling point on resuming to user mode. This
+way we can avoid to do it from rcu_user_enter() with the last resort
+self-IPI hack that enforces rescheduling.
+
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210131230548.32970-5-frederic@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/entry/common.c |  7 +++++++
+ kernel/rcu/tree.c     | 12 +++++++-----
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/entry/common.c b/kernel/entry/common.c
+index e9e2df3f3f9e..fcc7e93401d5 100644
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -174,6 +174,10 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+                * enabled above.
+                */
+               local_irq_disable_exit_to_user();
++
++              /* Check if any of the above work has queued a deferred wakeup */
++              rcu_nocb_flush_deferred_wakeup();
++
+               ti_work = READ_ONCE(current_thread_info()->flags);
+       }
+@@ -187,6 +191,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
+       lockdep_assert_irqs_disabled();
++      /* Flush pending rcuog wakeup before the last need_resched() check */
++      rcu_nocb_flush_deferred_wakeup();
++
+       if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
+               ti_work = exit_to_user_mode_loop(regs, ti_work);
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index f137a599941b..0d8a2e2df221 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -699,13 +699,15 @@ noinstr void rcu_user_enter(void)
+       lockdep_assert_irqs_disabled();
+       /*
+-       * We may be past the last rescheduling opportunity in the entry code.
+-       * Trigger a self IPI that will fire and reschedule once we resume to
+-       * user/guest mode.
++       * Other than generic entry implementation, we may be past the last
++       * rescheduling opportunity in the entry code. Trigger a self IPI
++       * that will fire and reschedule once we resume in user/guest mode.
+        */
+       instrumentation_begin();
+-      if (do_nocb_deferred_wakeup(rdp) && need_resched())
+-              irq_work_queue(this_cpu_ptr(&late_wakeup_work));
++      if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
++              if (do_nocb_deferred_wakeup(rdp) && need_resched())
++                      irq_work_queue(this_cpu_ptr(&late_wakeup_work));
++      }
+       instrumentation_end();
+       rcu_eqs_enter(true);
+-- 
+2.30.1
+
diff --git a/queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch b/queue-5.10/entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch
new file mode 100644 (file)
index 0000000..e0ff897
--- /dev/null
@@ -0,0 +1,159 @@
+From eeee2c3d420025d0ca1b0eb1aba6d6d912124d9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 00:05:48 +0100
+Subject: entry/kvm: Explicitly flush pending rcuog wakeup before last
+ rescheduling point
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit 4ae7dc97f726ea95c58ac58af71cc034ad22d7de ]
+
+Following the idle loop model, cleanly check for pending rcuog wakeup
+before the last rescheduling point upon resuming to guest mode. This
+way we can avoid to do it from rcu_user_enter() with the last resort
+self-IPI hack that enforces rescheduling.
+
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210131230548.32970-6-frederic@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c        |  1 +
+ include/linux/entry-kvm.h | 14 +++++++++++++
+ kernel/rcu/tree.c         | 44 ++++++++++++++++++++++++++++++---------
+ kernel/rcu/tree_plugin.h  |  1 +
+ 4 files changed, 50 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index fa5f059c2b94..08bb14e3bd61 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1776,6 +1776,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+ bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
+ {
++      xfer_to_guest_mode_prepare();
+       return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
+               xfer_to_guest_mode_work_pending();
+ }
+diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
+index 0cef17afb41a..d60ab08f9058 100644
+--- a/include/linux/entry-kvm.h
++++ b/include/linux/entry-kvm.h
+@@ -46,6 +46,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
+  */
+ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
++/**
++ * xfer_to_guest_mode_prepare - Perform last minute preparation work that
++ *                            need to be handled while IRQs are disabled
++ *                            upon entering to guest.
++ *
++ * Has to be invoked with interrupts disabled before the last call
++ * to xfer_to_guest_mode_work_pending().
++ */
++static inline void xfer_to_guest_mode_prepare(void)
++{
++      lockdep_assert_irqs_disabled();
++      rcu_nocb_flush_deferred_wakeup();
++}
++
+ /**
+  * __xfer_to_guest_mode_work_pending - Check if work is pending
+  *
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 0d8a2e2df221..eff2f7359a4c 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -670,9 +670,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
+ #ifdef CONFIG_NO_HZ_FULL
++#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)
+ /*
+  * An empty function that will trigger a reschedule on
+- * IRQ tail once IRQs get re-enabled on userspace resume.
++ * IRQ tail once IRQs get re-enabled on userspace/guest resume.
+  */
+ static void late_wakeup_func(struct irq_work *work)
+ {
+@@ -681,6 +682,37 @@ static void late_wakeup_func(struct irq_work *work)
+ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
+       IRQ_WORK_INIT(late_wakeup_func);
++/*
++ * If either:
++ *
++ * 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
++ * 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
++ *
++ * In these cases the late RCU wake ups aren't supported in the resched loops and our
++ * last resort is to fire a local irq_work that will trigger a reschedule once IRQs
++ * get re-enabled again.
++ */
++noinstr static void rcu_irq_work_resched(void)
++{
++      struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
++
++      if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
++              return;
++
++      if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
++              return;
++
++      instrumentation_begin();
++      if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
++              irq_work_queue(this_cpu_ptr(&late_wakeup_work));
++      }
++      instrumentation_end();
++}
++
++#else
++static inline void rcu_irq_work_resched(void) { }
++#endif
++
+ /**
+  * rcu_user_enter - inform RCU that we are resuming userspace.
+  *
+@@ -694,8 +726,6 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
+  */
+ noinstr void rcu_user_enter(void)
+ {
+-      struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+-
+       lockdep_assert_irqs_disabled();
+       /*
+@@ -703,13 +733,7 @@ noinstr void rcu_user_enter(void)
+        * rescheduling opportunity in the entry code. Trigger a self IPI
+        * that will fire and reschedule once we resume in user/guest mode.
+        */
+-      instrumentation_begin();
+-      if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
+-              if (do_nocb_deferred_wakeup(rdp) && need_resched())
+-                      irq_work_queue(this_cpu_ptr(&late_wakeup_work));
+-      }
+-      instrumentation_end();
+-
++      rcu_irq_work_resched();
+       rcu_eqs_enter(true);
+ }
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index 29a00d9ea286..a9351906e290 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -2197,6 +2197,7 @@ void rcu_nocb_flush_deferred_wakeup(void)
+ {
+       do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
+ }
++EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
+ void __init rcu_init_nohz(void)
+ {
+-- 
+2.30.1
+
diff --git a/queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch b/queue-5.10/rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch
new file mode 100644 (file)
index 0000000..a7db8c8
--- /dev/null
@@ -0,0 +1,184 @@
+From f5f7d56b45a5d6783a834b3e47864bc5eab7b574 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 00:05:46 +0100
+Subject: rcu/nocb: Trigger self-IPI on late deferred wake up before user
+ resume
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit f8bb5cae9616224a39cbb399de382d36ac41df10 ]
+
+Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
+kthread (rcuog) to be serviced.
+
+Unfortunately the call to rcu_user_enter() is already past the last
+rescheduling opportunity before we resume to userspace or to guest mode.
+We may escape there with the woken task ignored.
+
+The ultimate resort to fix every callsites is to trigger a self-IPI
+(nohz_full depends on arch to implement arch_irq_work_raise()) that will
+trigger a reschedule on IRQ tail or guest exit.
+
+Eventually every site that want a saner treatment will need to carefully
+place a call to rcu_nocb_flush_deferred_wakeup() before the last explicit
+need_resched() check upon resume.
+
+Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf)
+Reported-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210131230548.32970-4-frederic@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree.c        | 21 ++++++++++++++++++++-
+ kernel/rcu/tree.h        |  2 +-
+ kernel/rcu/tree_plugin.h | 25 ++++++++++++++++---------
+ 3 files changed, 37 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 5dc36c6e80fd..f137a599941b 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -669,6 +669,18 @@ void rcu_idle_enter(void)
+ EXPORT_SYMBOL_GPL(rcu_idle_enter);
+ #ifdef CONFIG_NO_HZ_FULL
++
++/*
++ * An empty function that will trigger a reschedule on
++ * IRQ tail once IRQs get re-enabled on userspace resume.
++ */
++static void late_wakeup_func(struct irq_work *work)
++{
++}
++
++static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
++      IRQ_WORK_INIT(late_wakeup_func);
++
+ /**
+  * rcu_user_enter - inform RCU that we are resuming userspace.
+  *
+@@ -686,12 +698,19 @@ noinstr void rcu_user_enter(void)
+       lockdep_assert_irqs_disabled();
++      /*
++       * We may be past the last rescheduling opportunity in the entry code.
++       * Trigger a self IPI that will fire and reschedule once we resume to
++       * user/guest mode.
++       */
+       instrumentation_begin();
+-      do_nocb_deferred_wakeup(rdp);
++      if (do_nocb_deferred_wakeup(rdp) && need_resched())
++              irq_work_queue(this_cpu_ptr(&late_wakeup_work));
+       instrumentation_end();
+       rcu_eqs_enter(true);
+ }
++
+ #endif /* CONFIG_NO_HZ_FULL */
+ /**
+diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
+index e4f66b8f7c47..0ec2b1f66b13 100644
+--- a/kernel/rcu/tree.h
++++ b/kernel/rcu/tree.h
+@@ -431,7 +431,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+                                unsigned long flags);
+ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
+ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
+ static void rcu_spawn_cpu_nocb_kthread(int cpu);
+ static void __init rcu_spawn_nocb_kthreads(void);
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index 7d4f78bf4057..29a00d9ea286 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -1631,8 +1631,8 @@ bool rcu_is_nocb_cpu(int cpu)
+  * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock
+  * and this function releases it.
+  */
+-static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+-                         unsigned long flags)
++static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
++                       unsigned long flags)
+       __releases(rdp->nocb_lock)
+ {
+       bool needwake = false;
+@@ -1643,7 +1643,7 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+               trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+                                   TPS("AlreadyAwake"));
+               rcu_nocb_unlock_irqrestore(rdp, flags);
+-              return;
++              return false;
+       }
+       del_timer(&rdp->nocb_timer);
+       rcu_nocb_unlock_irqrestore(rdp, flags);
+@@ -1656,6 +1656,8 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+       raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+       if (needwake)
+               wake_up_process(rdp_gp->nocb_gp_kthread);
++
++      return needwake;
+ }
+ /*
+@@ -2152,20 +2154,23 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+ }
+ /* Do a deferred wakeup of rcu_nocb_kthread(). */
+-static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
+ {
+       unsigned long flags;
+       int ndw;
++      int ret;
+       rcu_nocb_lock_irqsave(rdp, flags);
+       if (!rcu_nocb_need_deferred_wakeup(rdp)) {
+               rcu_nocb_unlock_irqrestore(rdp, flags);
+-              return;
++              return false;
+       }
+       ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+       WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+-      wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
++      ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+       trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
++
++      return ret;
+ }
+ /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
+@@ -2181,10 +2186,11 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
+  * This means we do an inexact common-case check.  Note that if
+  * we miss, ->nocb_timer will eventually clean things up.
+  */
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+ {
+       if (rcu_nocb_need_deferred_wakeup(rdp))
+-              do_nocb_deferred_wakeup_common(rdp);
++              return do_nocb_deferred_wakeup_common(rdp);
++      return false;
+ }
+ void rcu_nocb_flush_deferred_wakeup(void)
+@@ -2523,8 +2529,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+       return false;
+ }
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+ {
++      return false;
+ }
+ static void rcu_spawn_cpu_nocb_kthread(int cpu)
+-- 
+2.30.1
+
index 63b095b0f44cd350c5cd0336f971d9883cd8215e..0aaab991384e18554bb06faeb17fcbcc75f6cce7 100644 (file)
@@ -35,3 +35,6 @@ asoc-qcom-remove-useless-debug-print.patch
 soundwire-debugfs-use-controller-id-instead-of-link_id.patch
 rsi-fix-tx-eapol-packet-handling-against-iwlwifi-ap.patch
 rsi-move-card-interrupt-handling-to-rx-thread.patch
+rcu-nocb-trigger-self-ipi-on-late-deferred-wake-up-b.patch
+entry-explicitly-flush-pending-rcuog-wakeup-before-l.patch
+entry-kvm-explicitly-flush-pending-rcuog-wakeup-befo.patch