]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
cgroup: Migrate tasks to the root css when a controller is rebound
authorTejun Heo <tj@kernel.org>
Mon, 1 Jun 2026 18:56:04 +0000 (08:56 -1000)
committerTejun Heo <tj@kernel.org>
Tue, 2 Jun 2026 18:25:29 +0000 (08:25 -1000)
cgroup_apply_control_disable() defers kill_css_finish() while a css is
still populated, relying on css_update_populated() to fire the deferred
kill once the populated count reaches zero.

This deadlocks when a controller is rebound out of a hierarchy. Mounting
an implicit_on_dfl controller such as perf_event as a v1 hierarchy steals
it off the default hierarchy, and rebind_subsystems() kills its
per-cgroup csses while they are still populated. The migration run in the
same step keeps the old css for a controller no longer in the hierarchy's
mask, so no task is migrated off the dying csses. Their populated count
never reaches zero, the deferred kill_css_finish() never fires, and the
next cgroup_lock_and_drain_offline() hangs forever under cgroup_mutex.

That migration is already a no-op pass over the rebound subtree. Add
cgroup_rebind_ss_mask so find_existing_css_set() resolves the leaving
controllers to the root css. Their tasks are migrated there, the
per-cgroup csses depopulate, and cgroup_apply_control_disable() kills
them synchronously. The deferral stays correct for the rmdir and
controller-disable paths it was meant for.

Fixes: 1dffd95575eb ("cgroup: Defer kill_css_finish() in cgroup_apply_control_disable()")
Reported-by: Mark Brown <broonie@kernel.org>
Closes: https://lore.kernel.org/all/41cd159c-54e5-45e0-81df-eaf36a6c028e@sirena.org.uk/
Reported-by: Bert Karwatzki <spasswolf@web.de>
Closes: https://lore.kernel.org/all/4e986b4ed7e16547805d54b6e67d09120bc4d2f2.camel@web.de/
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cgroup.c

index 6e92791d279edec740df1dd938731bb928e124cd..4d442c224bf5229cfc46e6d20da50a17db79f072 100644 (file)
@@ -197,6 +197,14 @@ static u32 cgrp_dfl_implicit_ss_mask;
 /* some controllers can be threaded on the default hierarchy */
 static u32 cgrp_dfl_threaded_ss_mask;
 
+/*
+ * Set across rebind_subsystems() to the controllers leaving a hierarchy.
+ * Guarded by cgroup_mutex. Makes find_existing_css_set() resolve them to the
+ * root css so the affected tasks are migrated there before
+ * cgroup_apply_control_disable() kills the per-cgroup csses.
+ */
+static u32 cgroup_rebind_ss_mask;
+
 /* The list of hierarchy roots */
 LIST_HEAD(cgroup_roots);
 static int cgroup_root_count;
@@ -1083,7 +1091,15 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
         * won't change, so no need for locking.
         */
        for_each_subsys(ss, i) {
-               if (root->subsys_mask & (1UL << i)) {
+               if (unlikely(cgroup_rebind_ss_mask & (1UL << i))) {
+                       /*
+                        * @ss is leaving this hierarchy and its per-cgroup
+                        * csses are about to be killed. Resolve to the
+                        * surviving root css so the tasks are migrated there.
+                        */
+                       template[i] = cgroup_css(&root->cgrp, ss);
+                       WARN_ON_ONCE(!template[i]);
+               } else if (root->subsys_mask & (1UL << i)) {
                        /*
                         * @ss is in this hierarchy, so we want the
                         * effective css from @cgrp.
@@ -1853,11 +1869,17 @@ int rebind_subsystems(struct cgroup_root *dst_root, u32 ss_mask)
                struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
 
                /*
-                * Controllers from default hierarchy that need to be rebound
-                * are all disabled together in one go.
+                * Controllers leaving the default hierarchy are disabled
+                * together. cgroup_rebind_ss_mask makes cgroup_apply_control()
+                * migrate their tasks to the root css, so the per-cgroup csses
+                * are unpopulated when cgroup_finalize_control() kills them.
+                * Clear it before cgroup_finalize_control(), which does no
+                * css_set lookup.
                 */
                cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+               cgroup_rebind_ss_mask = dfl_disable_ss_mask;
                WARN_ON(cgroup_apply_control(scgrp));
+               cgroup_rebind_ss_mask = 0;
                cgroup_finalize_control(scgrp, 0);
        }
 
@@ -1871,9 +1893,14 @@ int rebind_subsystems(struct cgroup_root *dst_root, u32 ss_mask)
                WARN_ON(!css || cgroup_css(dcgrp, ss));
 
                if (src_root != &cgrp_dfl_root) {
-                       /* disable from the source */
+                       /*
+                        * Disable from the source, migrating its tasks to the
+                        * root css first (see cgroup_rebind_ss_mask).
+                        */
                        src_root->subsys_mask &= ~(1 << ssid);
+                       cgroup_rebind_ss_mask = 1 << ssid;
                        WARN_ON(cgroup_apply_control(scgrp));
+                       cgroup_rebind_ss_mask = 0;
                        cgroup_finalize_control(scgrp, 0);
                }