]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 Sep 2025 11:48:35 +0000 (13:48 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 Sep 2025 11:48:35 +0000 (13:48 +0200)
added patches:
rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch
rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch
rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch

queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch [new file with mode: 0644]
queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch [new file with mode: 0644]
queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch b/queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch
new file mode 100644 (file)
index 0000000..e5a3550
--- /dev/null
@@ -0,0 +1,157 @@
+From 1612160b91272f5b1596f499584d6064bf5be794 Mon Sep 17 00:00:00 2001
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Fri, 2 Feb 2024 11:49:06 -0800
+Subject: rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+commit 1612160b91272f5b1596f499584d6064bf5be794 upstream.
+
+Holding a mutex across synchronize_rcu_tasks() and acquiring
+that same mutex in code called from do_exit() after its call to
+exit_tasks_rcu_start() but before its call to exit_tasks_rcu_stop()
+results in deadlock.  This is by design, because tasks that are far
+enough into do_exit() are no longer present on the tasks list, making
+it a bit difficult for RCU Tasks to find them, let alone wait on them
+to do a voluntary context switch.  However, such deadlocks are becoming
+more frequent.  In addition, lockdep currently does not detect such
+deadlocks and they can be difficult to reproduce.
+
+In addition, if a task voluntarily context switches during that time
+(for example, if it blocks acquiring a mutex), then this task is in an
+RCU Tasks quiescent state.  And with some adjustments, RCU Tasks could
+just as well take advantage of that fact.
+
+This commit therefore eliminates these deadlock by replacing the
+SRCU-based wait for do_exit() completion with per-CPU lists of tasks
+currently exiting.  A given task will be on one of these per-CPU lists for
+the same period of time that this task would previously have been in the
+previous SRCU read-side critical section.  These lists enable RCU Tasks
+to find the tasks that have already been removed from the tasks list,
+but that must nevertheless be waited upon.
+
+The RCU Tasks grace period gathers any of these do_exit() tasks that it
+must wait on, and adds them to the list of holdouts.  Per-CPU locking
+and get_task_struct() are used to synchronize addition to and removal
+from these lists.
+
+Link: https://lore.kernel.org/all/20240118021842.290665-1-chenzhongjin@huawei.com/
+
+Reported-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Reported-by: Yang Jihong <yangjihong1@huawei.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Tested-by: Yang Jihong <yangjihong1@huawei.com>
+Tested-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Cc: Tahera Fahimi <taherafahimi@linux.microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tasks.h |   44 ++++++++++++++++++++++++++++----------------
+ 1 file changed, 28 insertions(+), 16 deletions(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -150,8 +150,6 @@ static struct rcu_tasks rt_name =
+ }
+ #ifdef CONFIG_TASKS_RCU
+-/* Track exiting tasks in order to allow them to be waited for. */
+-DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
+ /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
+ static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
+@@ -879,10 +877,12 @@ static void rcu_tasks_wait_gp(struct rcu
+ //    number of voluntary context switches, and add that task to the
+ //    holdout list.
+ // rcu_tasks_postscan():
+-//    Invoke synchronize_srcu() to ensure that all tasks that were
+-//    in the process of exiting (and which thus might not know to
+-//    synchronize with this RCU Tasks grace period) have completed
+-//    exiting.
++//    Gather per-CPU lists of tasks in do_exit() to ensure that all
++//    tasks that were in the process of exiting (and which thus might
++//    not know to synchronize with this RCU Tasks grace period) have
++//    completed exiting.  The synchronize_rcu() in rcu_tasks_postgp()
++//    will take care of any tasks stuck in the non-preemptible region
++//    of do_exit() following its call to exit_tasks_rcu_stop().
+ // check_all_holdout_tasks(), repeatedly until holdout list is empty:
+ //    Scans the holdout list, attempting to identify a quiescent state
+ //    for each task on the list.  If there is a quiescent state, the
+@@ -895,8 +895,10 @@ static void rcu_tasks_wait_gp(struct rcu
+ //    with interrupts disabled.
+ //
+ // For each exiting task, the exit_tasks_rcu_start() and
+-// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
+-// read-side critical sections waited for by rcu_tasks_postscan().
++// exit_tasks_rcu_finish() functions add and remove, respectively, the
++// current task to a per-CPU list of tasks that rcu_tasks_postscan() must
++// wait on.  This is necessary because rcu_tasks_postscan() must wait on
++// tasks that have already been removed from the global list of tasks.
+ //
+ // Pre-grace-period update-side code is ordered before the grace
+ // via the raw_spin_lock.*rcu_node().  Pre-grace-period read-side code
+@@ -960,9 +962,13 @@ static void rcu_tasks_pertask(struct tas
+       }
+ }
++void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
++DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
++
+ /* Processing between scanning taskslist and draining the holdout list. */
+ static void rcu_tasks_postscan(struct list_head *hop)
+ {
++      int cpu;
+       int rtsi = READ_ONCE(rcu_task_stall_info);
+       if (!IS_ENABLED(CONFIG_TINY_RCU)) {
+@@ -976,9 +982,9 @@ static void rcu_tasks_postscan(struct li
+        * this, divide the fragile exit path part in two intersecting
+        * read side critical sections:
+        *
+-       * 1) An _SRCU_ read side starting before calling exit_notify(),
+-       *    which may remove the task from the tasklist, and ending after
+-       *    the final preempt_disable() call in do_exit().
++       * 1) A task_struct list addition before calling exit_notify(),
++       *    which may remove the task from the tasklist, with the
++       *    removal after the final preempt_disable() call in do_exit().
+        *
+        * 2) An _RCU_ read side starting with the final preempt_disable()
+        *    call in do_exit() and ending with the final call to schedule()
+@@ -987,7 +993,17 @@ static void rcu_tasks_postscan(struct li
+        * This handles the part 1). And postgp will handle part 2) with a
+        * call to synchronize_rcu().
+        */
+-      synchronize_srcu(&tasks_rcu_exit_srcu);
++
++      for_each_possible_cpu(cpu) {
++              struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
++              struct task_struct *t;
++
++              raw_spin_lock_irq_rcu_node(rtpcp);
++              list_for_each_entry(t, &rtpcp->rtp_exit_list, rcu_tasks_exit_list)
++                      if (list_empty(&t->rcu_tasks_holdout_list))
++                              rcu_tasks_pertask(t, hop);
++              raw_spin_unlock_irq_rcu_node(rtpcp);
++      }
+       if (!IS_ENABLED(CONFIG_TINY_RCU))
+               del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
+@@ -1055,7 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_
+        *
+        * In addition, this synchronize_rcu() waits for exiting tasks
+        * to complete their final preempt_disable() region of execution,
+-       * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
+        * enforcing the whole region before tasklist removal until
+        * the final schedule() with TASK_DEAD state to be an RCU TASKS
+        * read side critical section.
+@@ -1063,9 +1078,6 @@ static void rcu_tasks_postgp(struct rcu_
+       synchronize_rcu();
+ }
+-void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
+-DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
+-
+ static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
+ {
+ #ifndef CONFIG_TINY_RCU
diff --git a/queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch b/queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch
new file mode 100644 (file)
index 0000000..1fa5c78
--- /dev/null
@@ -0,0 +1,94 @@
+From 6b70399f9ef3809f6e308fd99dd78b072c1bd05c Mon Sep 17 00:00:00 2001
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Fri, 2 Feb 2024 11:28:45 -0800
+Subject: rcu-tasks: Maintain lists to eliminate RCU-tasks/do_exit() deadlocks
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+commit 6b70399f9ef3809f6e308fd99dd78b072c1bd05c upstream.
+
+This commit continues the elimination of deadlocks involving do_exit()
+and RCU tasks by causing exit_tasks_rcu_start() to add the current
+task to a per-CPU list and causing exit_tasks_rcu_stop() to remove the
+current task from whatever list it is on.  These lists will be used to
+track tasks that are exiting, while still accounting for any RCU-tasks
+quiescent states that these tasks pass though.
+
+[ paulmck: Apply Frederic Weisbecker feedback. ]
+
+Link: https://lore.kernel.org/all/20240118021842.290665-1-chenzhongjin@huawei.com/
+
+Reported-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Reported-by: Yang Jihong <yangjihong1@huawei.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Tested-by: Yang Jihong <yangjihong1@huawei.com>
+Tested-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Cc: Tahera Fahimi <taherafahimi@linux.microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tasks.h |   43 +++++++++++++++++++++++++++++++++----------
+ 1 file changed, 33 insertions(+), 10 deletions(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -1175,25 +1175,48 @@ struct task_struct *get_rcu_tasks_gp_kth
+ EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
+ /*
+- * Contribute to protect against tasklist scan blind spot while the
+- * task is exiting and may be removed from the tasklist. See
+- * corresponding synchronize_srcu() for further details.
++ * Protect against tasklist scan blind spot while the task is exiting and
++ * may be removed from the tasklist.  Do this by adding the task to yet
++ * another list.
++ *
++ * Note that the task will remove itself from this list, so there is no
++ * need for get_task_struct(), except in the case where rcu_tasks_pertask()
++ * adds it to the holdout list, in which case rcu_tasks_pertask() supplies
++ * the needed get_task_struct().
+  */
+-void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
++void exit_tasks_rcu_start(void)
+ {
+-      current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
++      unsigned long flags;
++      struct rcu_tasks_percpu *rtpcp;
++      struct task_struct *t = current;
++
++      WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list));
++      preempt_disable();
++      rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
++      t->rcu_tasks_exit_cpu = smp_processor_id();
++      raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
++      if (!rtpcp->rtp_exit_list.next)
++              INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
++      list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
++      raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
++      preempt_enable();
+ }
+ /*
+- * Contribute to protect against tasklist scan blind spot while the
+- * task is exiting and may be removed from the tasklist. See
+- * corresponding synchronize_srcu() for further details.
++ * Remove the task from the "yet another list" because do_exit() is now
++ * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
+  */
+-void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
++void exit_tasks_rcu_stop(void)
+ {
++      unsigned long flags;
++      struct rcu_tasks_percpu *rtpcp;
+       struct task_struct *t = current;
+-      __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
++      WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list));
++      rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu);
++      raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
++      list_del_init(&t->rcu_tasks_exit_list);
++      raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+ }
+ /*
diff --git a/queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch b/queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch
new file mode 100644 (file)
index 0000000..fa93367
--- /dev/null
@@ -0,0 +1,74 @@
+From 0bb11a372fc8d7006b4d0f42a2882939747bdbff Mon Sep 17 00:00:00 2001
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Thu, 1 Feb 2024 06:10:26 -0800
+Subject: rcu-tasks: Maintain real-time response in rcu_tasks_postscan()
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+commit 0bb11a372fc8d7006b4d0f42a2882939747bdbff upstream.
+
+The current code will scan the entirety of each per-CPU list of exiting
+tasks in ->rtp_exit_list with interrupts disabled.  This is normally just
+fine, because each CPU typically won't have very many tasks in this state.
+However, if a large number of tasks block late in do_exit(), these lists
+could be arbitrarily long.  Low probability, perhaps, but it really
+could happen.
+
+This commit therefore occasionally re-enables interrupts while traversing
+these lists, inserting a dummy element to hold the current place in the
+list.  In kernels built with CONFIG_PREEMPT_RT=y, this re-enabling happens
+after each list element is processed, otherwise every one-to-two jiffies.
+
+[ paulmck: Apply Frederic Weisbecker feedback. ]
+
+Link: https://lore.kernel.org/all/ZdeI_-RfdLR8jlsm@localhost.localdomain/
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Siewior <bigeasy@linutronix.de>
+Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Cc: Tahera Fahimi <taherafahimi@linux.microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tasks.h |   22 +++++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -995,13 +995,33 @@ static void rcu_tasks_postscan(struct li
+        */
+       for_each_possible_cpu(cpu) {
++              unsigned long j = jiffies + 1;
+               struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
+               struct task_struct *t;
++              struct task_struct *t1;
++              struct list_head tmp;
+               raw_spin_lock_irq_rcu_node(rtpcp);
+-              list_for_each_entry(t, &rtpcp->rtp_exit_list, rcu_tasks_exit_list)
++              list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) {
+                       if (list_empty(&t->rcu_tasks_holdout_list))
+                               rcu_tasks_pertask(t, hop);
++
++                      // RT kernels need frequent pauses, otherwise
++                      // pause at least once per pair of jiffies.
++                      if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j))
++                              continue;
++
++                      // Keep our place in the list while pausing.
++                      // Nothing else traverses this list, so adding a
++                      // bare list_head is OK.
++                      list_add(&tmp, &t->rcu_tasks_exit_list);
++                      raw_spin_unlock_irq_rcu_node(rtpcp);
++                      cond_resched(); // For CONFIG_PREEMPT=n kernels
++                      raw_spin_lock_irq_rcu_node(rtpcp);
++                      t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list);
++                      list_del(&tmp);
++                      j = jiffies + 1;
++              }
+               raw_spin_unlock_irq_rcu_node(rtpcp);
+       }
index ada523c485def5572ff1a31e0eded918307fb2ec..3cba481f31f439392e53b9044c902a067fa373de 100644 (file)
@@ -24,3 +24,6 @@ s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch
 bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch
 tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch
 proc-fix-type-confusion-in-pde_set_flags.patch
+rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch
+rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch
+rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch