]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
cgroup, sched_ext: Include exiting tasks in cgroup iter
authorTejun Heo <tj@kernel.org>
Tue, 28 Apr 2026 00:16:34 +0000 (14:16 -1000)
committerTejun Heo <tj@kernel.org>
Mon, 4 May 2026 19:06:03 +0000 (09:06 -1000)
a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") made
css_task_iter_advance() skip exiting tasks so cgroup.procs stays consistent
with waitpid() visibility. Unfortunately, this broke scx_task_iter.

scx_task_iter walks either scx_tasks (global) or a cgroup subtree via
css_task_iter() and the two modes are expected to cover the same set of
tasks. After the above change the cgroup-scoped mode silently skips tasks
past exit_signals() that are still on scx_tasks.

scx_sub_enable_workfn()'s abort path is one of the symptoms: an exiting
SCX_TASK_SUB_INIT task can race past the cgroup iter leaking
__scx_init_task() state. Other iterations share the same gap.

Add CSS_TASK_ITER_WITH_DEAD to opt out of the skip and use it from
scx_task_iter().

Fixes: b0e4c2f8a0f0 ("sched_ext: Implement cgroup subtree iteration for scx_task_iter")
Reported-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
include/linux/cgroup.h
kernel/cgroup/cgroup.c
kernel/sched/ext.c

index e52160e85af4b5ba8b3540733aca20a2250fffee..f6d037a30fd899a5a3f7b6f68a2e21be95eff71d 100644 (file)
@@ -53,6 +53,7 @@ struct kernel_clone_args;
 enum css_task_iter_flags {
        CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
        CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
+       CSS_TASK_ITER_WITH_DEAD = (1U << 2),  /* include exiting tasks */
        CSS_TASK_ITER_SKIPPED  = (1U << 16), /* internal flags */
 };
 
index 1f084ee71443e6db5ee87f8213c150dc79290fc2..e51ce4cd37395fea107193f9ab563bc10b46ff52 100644 (file)
@@ -5059,10 +5059,12 @@ repeat:
 
        task = list_entry(it->task_pos, struct task_struct, cg_list);
        /*
-        * Hide tasks that are exiting but not yet removed. Keep zombie
-        * leaders with live threads visible.
+        * Hide tasks that are exiting but not yet removed by default. Keep
+        * zombie leaders with live threads visible. Usages that need to walk
+        * every existing task can opt out via CSS_TASK_ITER_WITH_DEAD.
         */
-       if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
+       if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) &&
+           (task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
                goto repeat;
 
        if (it->flags & CSS_TASK_ITER_PROCS) {
index 9483be03a4ca058a2c1bf4e08a7a4a60bcab6b62..dc5d4787296bc6389c1994717bad46f0e4e13712 100644 (file)
@@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
                lockdep_assert_held(&cgroup_mutex);
                iter->cgrp = cgrp;
                iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self);
-               css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+               css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
+                                   &iter->css_iter);
                return;
        }
 #endif
@@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
                        iter->css_pos = css_next_descendant_pre(iter->css_pos,
                                                                &iter->cgrp->self);
                        if (iter->css_pos)
-                               css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+                               css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
+                                                   &iter->css_iter);
                }
                return NULL;
        }