+++ /dev/null
-From foo@baz Mon Sep 24 09:35:12 CEST 2018
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-Date: Fri, 25 May 2018 19:23:09 -0700
-Subject: rcu: Fix grace-period hangs due to race with CPU offline
-
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-
-[ Upstream commit 1e64b15a4b102e1cd059d4d798b7a78f93341333 ]
-
-Without special fail-safe quiescent-state-propagation checks, grace-period
-hangs can result from the following scenario:
-
-1. CPU 1 goes offline.
-
-2. Because CPU 1 is the only CPU in the system blocking the current
- grace period, the grace period ends as soon as
- rcu_cleanup_dying_idle_cpu()'s call to rcu_report_qs_rnp()
- returns.
-
-3. At this point, the leaf rcu_node structure's ->lock is no longer
- held: rcu_report_qs_rnp() has released it, as it must in order
- to awaken the RCU grace-period kthread.
-
-4. At this point, that same leaf rcu_node structure's ->qsmaskinitnext
- field still records CPU 1 as being online. This is absolutely
- necessary because the scheduler uses RCU (in this case on the
- wake-up path while awakening RCU's grace-period kthread), and
- ->qsmaskinitnext contains RCU's idea as to which CPUs are online.
- Therefore, invoking rcu_report_qs_rnp() after clearing CPU 1's
- bit from ->qsmaskinitnext would result in a lockdep-RCU splat
- due to RCU being used from an offline CPU.
-
-5. RCU's grace-period kthread awakens, sees that the old grace period
- has completed and that a new one is needed. It therefore starts
- a new grace period, but because CPU 1's leaf rcu_node structure's
- ->qsmaskinitnext field still shows CPU 1 as being online, this new
- grace period is initialized to wait for a quiescent state from the
- now-offline CPU 1.
-
-6. Without the fail-safe force-quiescent-state checks, there would
- be no quiescent state from the now-offline CPU 1, which would
- eventually result in RCU CPU stall warnings and memory exhaustion.
-
-It would be good to get rid of the special fail-safe quiescent-state
-propagation checks, and thus it would be good to fix things so that
-the above scenario cannot happen. This commit therefore adds a new
-->ofl_lock to the rcu_state structure. This lock is held by rcu_gp_init()
-across the applying of buffered online and offline operations to the
-rcu_node tree, and it is also held by rcu_cleanup_dying_idle_cpu()
-when buffering a new offline operation. This prevents rcu_gp_init()
-from acquiring the leaf rcu_node structure's lock during the interval
-between when rcu_cleanup_dying_idle_cpu() invokes rcu_report_qs_rnp(),
-which releases ->lock and the re-acquisition of that same lock.
-This in turn prevents the failure scenario outlined above, and will
-hopefully eventually allow removal of the offline-CPU checks from the
-force-quiescent-state code path.
-
-Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/rcu/tree.c | 6 ++++++
- kernel/rcu/tree.h | 4 ++++
- 2 files changed, 10 insertions(+)
-
---- a/kernel/rcu/tree.c
-+++ b/kernel/rcu/tree.c
-@@ -102,6 +102,7 @@ struct rcu_state sname##_state = { \
- .abbr = sabbr, \
- .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
- .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-+ .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
- }
-
- RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-@@ -1996,11 +1997,13 @@ static bool rcu_gp_init(struct rcu_state
- */
- rcu_for_each_leaf_node(rsp, rnp) {
- rcu_gp_slow(rsp, gp_preinit_delay);
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irq_rcu_node(rnp);
- if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
- !rnp->wait_blkd_tasks) {
- /* Nothing to do on this leaf rcu_node structure. */
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- continue;
- }
-
-@@ -2035,6 +2038,7 @@ static bool rcu_gp_init(struct rcu_state
- }
-
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- /*
-@@ -3837,9 +3841,11 @@ static void rcu_cleanup_dying_idle_cpu(i
-
- /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
- mask = rdp->grpmask;
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
- rnp->qsmaskinitnext &= ~mask;
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- /*
---- a/kernel/rcu/tree.h
-+++ b/kernel/rcu/tree.h
-@@ -389,6 +389,10 @@ struct rcu_state {
- const char *name; /* Name of structure. */
- char abbr; /* Abbreviated name. */
- struct list_head flavors; /* List of RCU flavors. */
-+
-+ spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
-+ /* Synchronize offline with */
-+ /* GP pre-initialization. */
- };
-
- /* Values for rcu_state structure's gp_flags field. */
gpiolib-mark-gpio_suffixes-array-with-__maybe_unused.patch
mfd-88pm860x-i2c-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
input-rohm_bu21023-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
-rcu-fix-grace-period-hangs-due-to-race-with-cpu-offline.patch
drm-amdkfd-fix-error-codes-in-kfd_get_process.patch
rtc-bq4802-add-error-handling-for-devm_ioremap.patch
alsa-pcm-fix-snd_interval_refine-first-last-with-open-min-max.patch
+++ /dev/null
-From foo@baz Mon Sep 24 09:45:39 CEST 2018
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-Date: Fri, 25 May 2018 19:23:09 -0700
-Subject: rcu: Fix grace-period hangs due to race with CPU offline
-
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-
-[ Upstream commit 1e64b15a4b102e1cd059d4d798b7a78f93341333 ]
-
-Without special fail-safe quiescent-state-propagation checks, grace-period
-hangs can result from the following scenario:
-
-1. CPU 1 goes offline.
-
-2. Because CPU 1 is the only CPU in the system blocking the current
- grace period, the grace period ends as soon as
- rcu_cleanup_dying_idle_cpu()'s call to rcu_report_qs_rnp()
- returns.
-
-3. At this point, the leaf rcu_node structure's ->lock is no longer
- held: rcu_report_qs_rnp() has released it, as it must in order
- to awaken the RCU grace-period kthread.
-
-4. At this point, that same leaf rcu_node structure's ->qsmaskinitnext
- field still records CPU 1 as being online. This is absolutely
- necessary because the scheduler uses RCU (in this case on the
- wake-up path while awakening RCU's grace-period kthread), and
- ->qsmaskinitnext contains RCU's idea as to which CPUs are online.
- Therefore, invoking rcu_report_qs_rnp() after clearing CPU 1's
- bit from ->qsmaskinitnext would result in a lockdep-RCU splat
- due to RCU being used from an offline CPU.
-
-5. RCU's grace-period kthread awakens, sees that the old grace period
- has completed and that a new one is needed. It therefore starts
- a new grace period, but because CPU 1's leaf rcu_node structure's
- ->qsmaskinitnext field still shows CPU 1 as being online, this new
- grace period is initialized to wait for a quiescent state from the
- now-offline CPU 1.
-
-6. Without the fail-safe force-quiescent-state checks, there would
- be no quiescent state from the now-offline CPU 1, which would
- eventually result in RCU CPU stall warnings and memory exhaustion.
-
-It would be good to get rid of the special fail-safe quiescent-state
-propagation checks, and thus it would be good to fix things so that
-the above scenario cannot happen. This commit therefore adds a new
-->ofl_lock to the rcu_state structure. This lock is held by rcu_gp_init()
-across the applying of buffered online and offline operations to the
-rcu_node tree, and it is also held by rcu_cleanup_dying_idle_cpu()
-when buffering a new offline operation. This prevents rcu_gp_init()
-from acquiring the leaf rcu_node structure's lock during the interval
-between when rcu_cleanup_dying_idle_cpu() invokes rcu_report_qs_rnp(),
-which releases ->lock and the re-acquisition of that same lock.
-This in turn prevents the failure scenario outlined above, and will
-hopefully eventually allow removal of the offline-CPU checks from the
-force-quiescent-state code path.
-
-Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/rcu/tree.c | 6 ++++++
- kernel/rcu/tree.h | 4 ++++
- 2 files changed, 10 insertions(+)
-
---- a/kernel/rcu/tree.c
-+++ b/kernel/rcu/tree.c
-@@ -102,6 +102,7 @@ struct rcu_state sname##_state = { \
- .abbr = sabbr, \
- .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
- .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-+ .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
- }
-
- RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-@@ -1925,11 +1926,13 @@ static bool rcu_gp_init(struct rcu_state
- */
- rcu_for_each_leaf_node(rsp, rnp) {
- rcu_gp_slow(rsp, gp_preinit_delay);
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irq_rcu_node(rnp);
- if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
- !rnp->wait_blkd_tasks) {
- /* Nothing to do on this leaf rcu_node structure. */
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- continue;
- }
-
-@@ -1964,6 +1967,7 @@ static bool rcu_gp_init(struct rcu_state
- }
-
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- /*
-@@ -3725,9 +3729,11 @@ static void rcu_cleanup_dying_idle_cpu(i
-
- /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
- mask = rdp->grpmask;
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
- rnp->qsmaskinitnext &= ~mask;
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- /*
---- a/kernel/rcu/tree.h
-+++ b/kernel/rcu/tree.h
-@@ -384,6 +384,10 @@ struct rcu_state {
- const char *name; /* Name of structure. */
- char abbr; /* Abbreviated name. */
- struct list_head flavors; /* List of RCU flavors. */
-+
-+ spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
-+ /* Synchronize offline with */
-+ /* GP pre-initialization. */
- };
-
- /* Values for rcu_state structure's gp_flags field. */
rcutorture-use-monotonic-timestamp-for-stall-detection.patch
mfd-88pm860x-i2c-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
input-rohm_bu21023-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
-rcu-fix-grace-period-hangs-due-to-race-with-cpu-offline.patch
drm-amdkfd-fix-kernel-queue-64-bit-doorbell-offset-calculation.patch
drm-amdkfd-fix-error-codes-in-kfd_get_process.patch
rtc-bq4802-add-error-handling-for-devm_ioremap.patch
+++ /dev/null
-From foo@baz Mon Sep 24 09:32:39 CEST 2018
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-Date: Fri, 25 May 2018 19:23:09 -0700
-Subject: rcu: Fix grace-period hangs due to race with CPU offline
-
-From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-
-[ Upstream commit 1e64b15a4b102e1cd059d4d798b7a78f93341333 ]
-
-Without special fail-safe quiescent-state-propagation checks, grace-period
-hangs can result from the following scenario:
-
-1. CPU 1 goes offline.
-
-2. Because CPU 1 is the only CPU in the system blocking the current
- grace period, the grace period ends as soon as
- rcu_cleanup_dying_idle_cpu()'s call to rcu_report_qs_rnp()
- returns.
-
-3. At this point, the leaf rcu_node structure's ->lock is no longer
- held: rcu_report_qs_rnp() has released it, as it must in order
- to awaken the RCU grace-period kthread.
-
-4. At this point, that same leaf rcu_node structure's ->qsmaskinitnext
- field still records CPU 1 as being online. This is absolutely
- necessary because the scheduler uses RCU (in this case on the
- wake-up path while awakening RCU's grace-period kthread), and
- ->qsmaskinitnext contains RCU's idea as to which CPUs are online.
- Therefore, invoking rcu_report_qs_rnp() after clearing CPU 1's
- bit from ->qsmaskinitnext would result in a lockdep-RCU splat
- due to RCU being used from an offline CPU.
-
-5. RCU's grace-period kthread awakens, sees that the old grace period
- has completed and that a new one is needed. It therefore starts
- a new grace period, but because CPU 1's leaf rcu_node structure's
- ->qsmaskinitnext field still shows CPU 1 as being online, this new
- grace period is initialized to wait for a quiescent state from the
- now-offline CPU 1.
-
-6. Without the fail-safe force-quiescent-state checks, there would
- be no quiescent state from the now-offline CPU 1, which would
- eventually result in RCU CPU stall warnings and memory exhaustion.
-
-It would be good to get rid of the special fail-safe quiescent-state
-propagation checks, and thus it would be good to fix things so that
-the above scenario cannot happen. This commit therefore adds a new
-->ofl_lock to the rcu_state structure. This lock is held by rcu_gp_init()
-across the applying of buffered online and offline operations to the
-rcu_node tree, and it is also held by rcu_cleanup_dying_idle_cpu()
-when buffering a new offline operation. This prevents rcu_gp_init()
-from acquiring the leaf rcu_node structure's lock during the interval
-between when rcu_cleanup_dying_idle_cpu() invokes rcu_report_qs_rnp(),
-which releases ->lock and the re-acquisition of that same lock.
-This in turn prevents the failure scenario outlined above, and will
-hopefully eventually allow removal of the offline-CPU checks from the
-force-quiescent-state code path.
-
-Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/rcu/tree.c | 6 ++++++
- kernel/rcu/tree.h | 4 ++++
- 2 files changed, 10 insertions(+)
-
---- a/kernel/rcu/tree.c
-+++ b/kernel/rcu/tree.c
-@@ -102,6 +102,7 @@ struct rcu_state sname##_state = { \
- .abbr = sabbr, \
- .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
- .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-+ .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
- }
-
- RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-@@ -1966,11 +1967,13 @@ static bool rcu_gp_init(struct rcu_state
- */
- rcu_for_each_leaf_node(rsp, rnp) {
- rcu_gp_slow(rsp, gp_preinit_delay);
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irq_rcu_node(rnp);
- if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
- !rnp->wait_blkd_tasks) {
- /* Nothing to do on this leaf rcu_node structure. */
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- continue;
- }
-
-@@ -2005,6 +2008,7 @@ static bool rcu_gp_init(struct rcu_state
- }
-
- raw_spin_unlock_irq_rcu_node(rnp);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- /*
-@@ -3915,9 +3919,11 @@ static void rcu_cleanup_dying_idle_cpu(i
-
- /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
- mask = rdp->grpmask;
-+ spin_lock(&rsp->ofl_lock);
- raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
- rnp->qsmaskinitnext &= ~mask;
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-+ spin_unlock(&rsp->ofl_lock);
- }
-
- void rcu_report_dead(unsigned int cpu)
---- a/kernel/rcu/tree.h
-+++ b/kernel/rcu/tree.h
-@@ -550,6 +550,10 @@ struct rcu_state {
- const char *name; /* Name of structure. */
- char abbr; /* Abbreviated name. */
- struct list_head flavors; /* List of RCU flavors. */
-+
-+ spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
-+ /* Synchronize offline with */
-+ /* GP pre-initialization. */
- };
-
- /* Values for rcu_state structure's gp_flags field. */
gpiolib-mark-gpio_suffixes-array-with-__maybe_unused.patch
mfd-88pm860x-i2c-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
input-rohm_bu21023-switch-to-i2c_lock_bus-...-i2c_lock_segment.patch
-rcu-fix-grace-period-hangs-due-to-race-with-cpu-offline.patch
drm-amdkfd-fix-error-codes-in-kfd_get_process.patch
rtc-bq4802-add-error-handling-for-devm_ioremap.patch
alsa-pcm-fix-snd_interval_refine-first-last-with-open-min-max.patch