From: Greg Kroah-Hartman Date: Sat, 13 Sep 2025 11:48:35 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v6.1.153~72 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8e6f03f4f0b104bbde2f2ba5d62c6cb0dd652f5f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch --- diff --git a/queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch b/queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch new file mode 100644 index 0000000000..e5a3550566 --- /dev/null +++ b/queue-6.6/rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch @@ -0,0 +1,157 @@ +From 1612160b91272f5b1596f499584d6064bf5be794 Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Fri, 2 Feb 2024 11:49:06 -0800 +Subject: rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks + +From: Paul E. McKenney + +commit 1612160b91272f5b1596f499584d6064bf5be794 upstream. + +Holding a mutex across synchronize_rcu_tasks() and acquiring +that same mutex in code called from do_exit() after its call to +exit_tasks_rcu_start() but before its call to exit_tasks_rcu_stop() +results in deadlock. This is by design, because tasks that are far +enough into do_exit() are no longer present on the tasks list, making +it a bit difficult for RCU Tasks to find them, let alone wait on them +to do a voluntary context switch. However, such deadlocks are becoming +more frequent. In addition, lockdep currently does not detect such +deadlocks and they can be difficult to reproduce. + +In addition, if a task voluntarily context switches during that time +(for example, if it blocks acquiring a mutex), then this task is in an +RCU Tasks quiescent state. And with some adjustments, RCU Tasks could +just as well take advantage of that fact. + +This commit therefore eliminates these deadlock by replacing the +SRCU-based wait for do_exit() completion with per-CPU lists of tasks +currently exiting. A given task will be on one of these per-CPU lists for +the same period of time that this task would previously have been in the +previous SRCU read-side critical section. These lists enable RCU Tasks +to find the tasks that have already been removed from the tasks list, +but that must nevertheless be waited upon. + +The RCU Tasks grace period gathers any of these do_exit() tasks that it +must wait on, and adds them to the list of holdouts. Per-CPU locking +and get_task_struct() are used to synchronize addition to and removal +from these lists. + +Link: https://lore.kernel.org/all/20240118021842.290665-1-chenzhongjin@huawei.com/ + +Reported-by: Chen Zhongjin +Reported-by: Yang Jihong +Signed-off-by: Paul E. McKenney +Tested-by: Yang Jihong +Tested-by: Chen Zhongjin +Reviewed-by: Frederic Weisbecker +Signed-off-by: Boqun Feng +Cc: Tahera Fahimi +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tasks.h | 44 ++++++++++++++++++++++++++++---------------- + 1 file changed, 28 insertions(+), 16 deletions(-) + +--- a/kernel/rcu/tasks.h ++++ b/kernel/rcu/tasks.h +@@ -150,8 +150,6 @@ static struct rcu_tasks rt_name = + } + + #ifdef CONFIG_TASKS_RCU +-/* Track exiting tasks in order to allow them to be waited for. */ +-DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); + + /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */ + static void tasks_rcu_exit_srcu_stall(struct timer_list *unused); +@@ -879,10 +877,12 @@ static void rcu_tasks_wait_gp(struct rcu + // number of voluntary context switches, and add that task to the + // holdout list. + // rcu_tasks_postscan(): +-// Invoke synchronize_srcu() to ensure that all tasks that were +-// in the process of exiting (and which thus might not know to +-// synchronize with this RCU Tasks grace period) have completed +-// exiting. ++// Gather per-CPU lists of tasks in do_exit() to ensure that all ++// tasks that were in the process of exiting (and which thus might ++// not know to synchronize with this RCU Tasks grace period) have ++// completed exiting. The synchronize_rcu() in rcu_tasks_postgp() ++// will take care of any tasks stuck in the non-preemptible region ++// of do_exit() following its call to exit_tasks_rcu_stop(). + // check_all_holdout_tasks(), repeatedly until holdout list is empty: + // Scans the holdout list, attempting to identify a quiescent state + // for each task on the list. If there is a quiescent state, the +@@ -895,8 +895,10 @@ static void rcu_tasks_wait_gp(struct rcu + // with interrupts disabled. + // + // For each exiting task, the exit_tasks_rcu_start() and +-// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU +-// read-side critical sections waited for by rcu_tasks_postscan(). ++// exit_tasks_rcu_finish() functions add and remove, respectively, the ++// current task to a per-CPU list of tasks that rcu_tasks_postscan() must ++// wait on. This is necessary because rcu_tasks_postscan() must wait on ++// tasks that have already been removed from the global list of tasks. + // + // Pre-grace-period update-side code is ordered before the grace + // via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code +@@ -960,9 +962,13 @@ static void rcu_tasks_pertask(struct tas + } + } + ++void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func); ++DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); ++ + /* Processing between scanning taskslist and draining the holdout list. */ + static void rcu_tasks_postscan(struct list_head *hop) + { ++ int cpu; + int rtsi = READ_ONCE(rcu_task_stall_info); + + if (!IS_ENABLED(CONFIG_TINY_RCU)) { +@@ -976,9 +982,9 @@ static void rcu_tasks_postscan(struct li + * this, divide the fragile exit path part in two intersecting + * read side critical sections: + * +- * 1) An _SRCU_ read side starting before calling exit_notify(), +- * which may remove the task from the tasklist, and ending after +- * the final preempt_disable() call in do_exit(). ++ * 1) A task_struct list addition before calling exit_notify(), ++ * which may remove the task from the tasklist, with the ++ * removal after the final preempt_disable() call in do_exit(). + * + * 2) An _RCU_ read side starting with the final preempt_disable() + * call in do_exit() and ending with the final call to schedule() +@@ -987,7 +993,17 @@ static void rcu_tasks_postscan(struct li + * This handles the part 1). And postgp will handle part 2) with a + * call to synchronize_rcu(). + */ +- synchronize_srcu(&tasks_rcu_exit_srcu); ++ ++ for_each_possible_cpu(cpu) { ++ struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu); ++ struct task_struct *t; ++ ++ raw_spin_lock_irq_rcu_node(rtpcp); ++ list_for_each_entry(t, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) ++ if (list_empty(&t->rcu_tasks_holdout_list)) ++ rcu_tasks_pertask(t, hop); ++ raw_spin_unlock_irq_rcu_node(rtpcp); ++ } + + if (!IS_ENABLED(CONFIG_TINY_RCU)) + del_timer_sync(&tasks_rcu_exit_srcu_stall_timer); +@@ -1055,7 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_ + * + * In addition, this synchronize_rcu() waits for exiting tasks + * to complete their final preempt_disable() region of execution, +- * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu), + * enforcing the whole region before tasklist removal until + * the final schedule() with TASK_DEAD state to be an RCU TASKS + * read side critical section. +@@ -1063,9 +1078,6 @@ static void rcu_tasks_postgp(struct rcu_ + synchronize_rcu(); + } + +-void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func); +-DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); +- + static void tasks_rcu_exit_srcu_stall(struct timer_list *unused) + { + #ifndef CONFIG_TINY_RCU diff --git a/queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch b/queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch new file mode 100644 index 0000000000..1fa5c786e2 --- /dev/null +++ b/queue-6.6/rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch @@ -0,0 +1,94 @@ +From 6b70399f9ef3809f6e308fd99dd78b072c1bd05c Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Fri, 2 Feb 2024 11:28:45 -0800 +Subject: rcu-tasks: Maintain lists to eliminate RCU-tasks/do_exit() deadlocks + +From: Paul E. McKenney + +commit 6b70399f9ef3809f6e308fd99dd78b072c1bd05c upstream. + +This commit continues the elimination of deadlocks involving do_exit() +and RCU tasks by causing exit_tasks_rcu_start() to add the current +task to a per-CPU list and causing exit_tasks_rcu_stop() to remove the +current task from whatever list it is on. These lists will be used to +track tasks that are exiting, while still accounting for any RCU-tasks +quiescent states that these tasks pass though. + +[ paulmck: Apply Frederic Weisbecker feedback. ] + +Link: https://lore.kernel.org/all/20240118021842.290665-1-chenzhongjin@huawei.com/ + +Reported-by: Chen Zhongjin +Reported-by: Yang Jihong +Signed-off-by: Paul E. McKenney +Tested-by: Yang Jihong +Tested-by: Chen Zhongjin +Reviewed-by: Frederic Weisbecker +Signed-off-by: Boqun Feng +Cc: Tahera Fahimi +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tasks.h | 43 +++++++++++++++++++++++++++++++++---------- + 1 file changed, 33 insertions(+), 10 deletions(-) + +--- a/kernel/rcu/tasks.h ++++ b/kernel/rcu/tasks.h +@@ -1175,25 +1175,48 @@ struct task_struct *get_rcu_tasks_gp_kth + EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread); + + /* +- * Contribute to protect against tasklist scan blind spot while the +- * task is exiting and may be removed from the tasklist. See +- * corresponding synchronize_srcu() for further details. ++ * Protect against tasklist scan blind spot while the task is exiting and ++ * may be removed from the tasklist. Do this by adding the task to yet ++ * another list. ++ * ++ * Note that the task will remove itself from this list, so there is no ++ * need for get_task_struct(), except in the case where rcu_tasks_pertask() ++ * adds it to the holdout list, in which case rcu_tasks_pertask() supplies ++ * the needed get_task_struct(). + */ +-void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) ++void exit_tasks_rcu_start(void) + { +- current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); ++ unsigned long flags; ++ struct rcu_tasks_percpu *rtpcp; ++ struct task_struct *t = current; ++ ++ WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list)); ++ preempt_disable(); ++ rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu); ++ t->rcu_tasks_exit_cpu = smp_processor_id(); ++ raw_spin_lock_irqsave_rcu_node(rtpcp, flags); ++ if (!rtpcp->rtp_exit_list.next) ++ INIT_LIST_HEAD(&rtpcp->rtp_exit_list); ++ list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list); ++ raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); ++ preempt_enable(); + } + + /* +- * Contribute to protect against tasklist scan blind spot while the +- * task is exiting and may be removed from the tasklist. See +- * corresponding synchronize_srcu() for further details. ++ * Remove the task from the "yet another list" because do_exit() is now ++ * non-preemptible, allowing synchronize_rcu() to wait beyond this point. + */ +-void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) ++void exit_tasks_rcu_stop(void) + { ++ unsigned long flags; ++ struct rcu_tasks_percpu *rtpcp; + struct task_struct *t = current; + +- __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); ++ WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list)); ++ rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu); ++ raw_spin_lock_irqsave_rcu_node(rtpcp, flags); ++ list_del_init(&t->rcu_tasks_exit_list); ++ raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); + } + + /* diff --git a/queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch b/queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch new file mode 100644 index 0000000000..fa933679ec --- /dev/null +++ b/queue-6.6/rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch @@ -0,0 +1,74 @@ +From 0bb11a372fc8d7006b4d0f42a2882939747bdbff Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Thu, 1 Feb 2024 06:10:26 -0800 +Subject: rcu-tasks: Maintain real-time response in rcu_tasks_postscan() + +From: Paul E. McKenney + +commit 0bb11a372fc8d7006b4d0f42a2882939747bdbff upstream. + +The current code will scan the entirety of each per-CPU list of exiting +tasks in ->rtp_exit_list with interrupts disabled. This is normally just +fine, because each CPU typically won't have very many tasks in this state. +However, if a large number of tasks block late in do_exit(), these lists +could be arbitrarily long. Low probability, perhaps, but it really +could happen. + +This commit therefore occasionally re-enables interrupts while traversing +these lists, inserting a dummy element to hold the current place in the +list. In kernels built with CONFIG_PREEMPT_RT=y, this re-enabling happens +after each list element is processed, otherwise every one-to-two jiffies. + +[ paulmck: Apply Frederic Weisbecker feedback. ] + +Link: https://lore.kernel.org/all/ZdeI_-RfdLR8jlsm@localhost.localdomain/ + +Signed-off-by: Paul E. McKenney +Cc: Thomas Gleixner +Cc: Sebastian Siewior +Cc: Anna-Maria Behnsen +Cc: Steven Rostedt +Signed-off-by: Boqun Feng +Cc: Tahera Fahimi +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tasks.h | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +--- a/kernel/rcu/tasks.h ++++ b/kernel/rcu/tasks.h +@@ -995,13 +995,33 @@ static void rcu_tasks_postscan(struct li + */ + + for_each_possible_cpu(cpu) { ++ unsigned long j = jiffies + 1; + struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu); + struct task_struct *t; ++ struct task_struct *t1; ++ struct list_head tmp; + + raw_spin_lock_irq_rcu_node(rtpcp); +- list_for_each_entry(t, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) ++ list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) { + if (list_empty(&t->rcu_tasks_holdout_list)) + rcu_tasks_pertask(t, hop); ++ ++ // RT kernels need frequent pauses, otherwise ++ // pause at least once per pair of jiffies. ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j)) ++ continue; ++ ++ // Keep our place in the list while pausing. ++ // Nothing else traverses this list, so adding a ++ // bare list_head is OK. ++ list_add(&tmp, &t->rcu_tasks_exit_list); ++ raw_spin_unlock_irq_rcu_node(rtpcp); ++ cond_resched(); // For CONFIG_PREEMPT=n kernels ++ raw_spin_lock_irq_rcu_node(rtpcp); ++ t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list); ++ list_del(&tmp); ++ j = jiffies + 1; ++ } + raw_spin_unlock_irq_rcu_node(rtpcp); + } + diff --git a/queue-6.6/series b/queue-6.6/series index ada523c485..3cba481f31 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -24,3 +24,6 @@ s390-cpum_cf-deny-all-sampling-events-by-counter-pmu.patch bpf-tell-memcg-to-use-allow_spinning-false-path-in-b.patch tcp_bpf-call-sk_msg_free-when-tcp_bpf_send_verdict-f.patch proc-fix-type-confusion-in-pde_set_flags.patch +rcu-tasks-maintain-lists-to-eliminate-rcu-tasks-do_exit-deadlocks.patch +rcu-tasks-eliminate-deadlocks-involving-do_exit-and-rcu-tasks.patch +rcu-tasks-maintain-real-time-response-in-rcu_tasks_postscan.patch