]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
cpuset: fix overlap of partition effective CPUs
authorChen Ridong <chenridong@huawei.com>
Thu, 29 Jan 2026 06:45:16 +0000 (06:45 +0000)
committerTejun Heo <tj@kernel.org>
Sun, 1 Feb 2026 16:49:52 +0000 (06:49 -1000)
A warning was detect:

 WARNING: kernel/cgroup/cpuset.c:825 at rebuild_sched_domains_locked
 Modules linked in:
 CPU: 12 UID: 0 PID: 681 Comm: rmdir  6.19.0-rc6-next-20260121+
 RIP: 0010:rebuild_sched_domains_locked+0x309/0x4b0
 RSP: 0018:ffffc900019bbd28 EFLAGS: 00000202
 RAX: ffff888104413508 RBX: 0000000000000008 RCX: ffff888104413510
 RDX: ffff888109b5f400 RSI: 000000000000ffcf RDI: 0000000000000001
 RBP: 0000000000000002 R08: ffff888104413508 R09: 0000000000000002
 R10: ffff888104413508 R11: 0000000000000001 R12: ffff888104413500
 R13: 0000000000000002 R14: ffffc900019bbd78 R15: 0000000000000000
 FS:  00007fe274b8d740(0000) GS:ffff8881b6b3c000(0000) knlGS:
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fe274c98b50 CR3: 00000001047a9000 CR4: 00000000000006f0
 Call Trace:
  <TASK>
  update_prstate+0x1c7/0x580
  cpuset_css_killed+0x2f/0x50
  kill_css+0x32/0x180
  cgroup_destroy_locked+0xa7/0x200
  cgroup_rmdir+0x28/0x100
  kernfs_iop_rmdir+0x4c/0x80
  vfs_rmdir+0x12c/0x280
  filename_rmdir+0x19e/0x200
  __x64_sys_rmdir+0x23/0x40
  do_syscall_64+0x6b/0x390

It can be reproduced by steps:

  # cd /sys/fs/cgroup/
  # mkdir A1
  # mkdir B1
  # mkdir C1
  # echo 1-3 > A1/cpuset.cpus
  # echo root > A1/cpuset.cpus.partition
  # echo 3-5 > B1/cpuset.cpus
  # echo root > B1/cpuset.cpus.partition
  # echo 6 > C1/cpuset.cpus
  # echo root > C1/cpuset.cpus.partition
  # rmdir A1/
  # rmdir C1/

Both A1 and B1 were initially configured with CPU 3, which was exclusively
assigned to A1's partition. When A1 was removed, CPU 3 was returned to the
root pool. However, B1 incorrectly regained access to CPU 3 when
update_cpumasks_hier was triggered during C1's removal, which also updated
sibling configurations.

The update_sibling_cpumasks function was called to synchronize siblings'
effective CPUs due to changes in their parent's effective CPUs. However,
parent effective CPU changes should not affect partition-effective CPUs.

To fix this issue, update_cpumasks_hier should only be invoked when the
sibling is not a valid partition in the update_sibling_cpumasks.

Fixes: 2a3602030d80 ("cgroup/cpuset: Don't invalidate sibling partitions on cpuset.cpus conflict")
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cpuset.c

index 003232dc9d2e5db43e1663836bd06cab3c56398e..92a51316225d28eefe7a390ae1634dc116efa1bf 100644 (file)
@@ -2222,27 +2222,20 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
         * It is possible a change in parent's effective_cpus
         * due to a change in a child partition's effective_xcpus will impact
         * its siblings even if they do not inherit parent's effective_cpus
-        * directly.
+        * directly. It should not impact valid partition.
         *
         * The update_cpumasks_hier() function may sleep. So we have to
         * release the RCU read lock before calling it.
         */
        rcu_read_lock();
        cpuset_for_each_child(sibling, pos_css, parent) {
-               if (sibling == cs)
+               if (sibling == cs || is_partition_valid(sibling))
                        continue;
-               if (!is_partition_valid(sibling)) {
-                       compute_effective_cpumask(tmp->new_cpus, sibling,
-                                                 parent);
-                       if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus))
-                               continue;
-               } else if (is_remote_partition(sibling)) {
-                       /*
-                        * Change in a sibling cpuset won't affect a remote
-                        * partition root.
-                        */
+
+               compute_effective_cpumask(tmp->new_cpus, sibling,
+                                         parent);
+               if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus))
                        continue;
-               }
 
                if (!css_tryget_online(&sibling->css))
                        continue;