--- /dev/null
+From 990fc5a559e40372fbddffe6827926dc4ff9ce36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 00:05:46 +0100
+Subject: rcu/nocb: Trigger self-IPI on late deferred wake up before user
+ resume
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit f8bb5cae9616224a39cbb399de382d36ac41df10 ]
+
+Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
+kthread (rcuog) to be serviced.
+
+Unfortunately the call to rcu_user_enter() is already past the last
+rescheduling opportunity before we resume to userspace or to guest mode.
+We may escape there with the woken task ignored.
+
+The ultimate resort to fix every callsites is to trigger a self-IPI
+(nohz_full depends on arch to implement arch_irq_work_raise()) that will
+trigger a reschedule on IRQ tail or guest exit.
+
+Eventually every site that want a saner treatment will need to carefully
+place a call to rcu_nocb_flush_deferred_wakeup() before the last explicit
+need_resched() check upon resume.
+
+Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf)
+Reported-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210131230548.32970-4-frederic@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree.c | 21 ++++++++++++++++++++-
+ kernel/rcu/tree.h | 2 +-
+ kernel/rcu/tree_plugin.h | 25 ++++++++++++++++---------
+ 3 files changed, 37 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 4dfa9dd47223..71d3717ee66d 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -604,6 +604,18 @@ void rcu_idle_enter(void)
+ }
+
+ #ifdef CONFIG_NO_HZ_FULL
++
++/*
++ * An empty function that will trigger a reschedule on
++ * IRQ tail once IRQs get re-enabled on userspace resume.
++ */
++static void late_wakeup_func(struct irq_work *work)
++{
++}
++
++static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
++ IRQ_WORK_INIT(late_wakeup_func);
++
+ /**
+ * rcu_user_enter - inform RCU that we are resuming userspace.
+ *
+@@ -621,12 +633,19 @@ void rcu_user_enter(void)
+
+ lockdep_assert_irqs_disabled();
+
++ /*
++ * We may be past the last rescheduling opportunity in the entry code.
++ * Trigger a self IPI that will fire and reschedule once we resume to
++ * user/guest mode.
++ */
+ instrumentation_begin();
+- do_nocb_deferred_wakeup(rdp);
++ if (do_nocb_deferred_wakeup(rdp) && need_resched())
++ irq_work_queue(this_cpu_ptr(&late_wakeup_work));
+ instrumentation_end();
+
+ rcu_eqs_enter(true);
+ }
++
+ #endif /* CONFIG_NO_HZ_FULL */
+
+ /*
+diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
+index c612f306fe89..564adf10c86f 100644
+--- a/kernel/rcu/tree.h
++++ b/kernel/rcu/tree.h
+@@ -438,7 +438,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+ unsigned long flags);
+ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
+ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
+ static void rcu_spawn_cpu_nocb_kthread(int cpu);
+ static void __init rcu_spawn_nocb_kthreads(void);
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index a71a4a272515..0eca302c59d7 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -1639,8 +1639,8 @@ bool rcu_is_nocb_cpu(int cpu)
+ * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock
+ * and this function releases it.
+ */
+-static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+- unsigned long flags)
++static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
++ unsigned long flags)
+ __releases(rdp->nocb_lock)
+ {
+ bool needwake = false;
+@@ -1651,7 +1651,7 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
+ TPS("AlreadyAwake"));
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+- return;
++ return false;
+ }
+ del_timer(&rdp->nocb_timer);
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+@@ -1664,6 +1664,8 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
+ if (needwake)
+ wake_up_process(rdp_gp->nocb_gp_kthread);
++
++ return needwake;
+ }
+
+ /*
+@@ -2155,20 +2157,23 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+ }
+
+ /* Do a deferred wakeup of rcu_nocb_kthread(). */
+-static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
+ {
+ unsigned long flags;
+ int ndw;
++ int ret;
+
+ rcu_nocb_lock_irqsave(rdp, flags);
+ if (!rcu_nocb_need_deferred_wakeup(rdp)) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+- return;
++ return false;
+ }
+ ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+ WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+- wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
++ ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
++
++ return ret;
+ }
+
+ /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
+@@ -2184,10 +2189,11 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
+ * This means we do an inexact common-case check. Note that if
+ * we miss, ->nocb_timer will eventually clean things up.
+ */
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+ {
+ if (rcu_nocb_need_deferred_wakeup(rdp))
+- do_nocb_deferred_wakeup_common(rdp);
++ return do_nocb_deferred_wakeup_common(rdp);
++ return false;
+ }
+
+ void rcu_nocb_flush_deferred_wakeup(void)
+@@ -2527,8 +2533,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+ return false;
+ }
+
+-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
++static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
+ {
++ return false;
+ }
+
+ static void rcu_spawn_cpu_nocb_kthread(int cpu)
+--
+2.30.1
+