cgroup: Merge branch 'for-6.17-fixes' into for-6.18

author Tejun Heo <tj@kernel.org>

Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)

committer Tejun Heo <tj@kernel.org>

Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)
author Tejun Heo <tj@kernel.org>
Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)
committer Tejun Heo <tj@kernel.org>
Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)
diff --cc Documentation/admin-guide/cgroup-v2.rst
Simple merge
diff --cc kernel/cgroup/cgroup.c

index 99d3b6c0f328c4a088cfe4e917c38872a73e1da4,77d02f87f3f121a5d0bc47e4d9d5c1a8d53c7192..0607c5d092378213f16eddfed5732b81154c128b
--- 1/kernel/cgroup/cgroup.c
--- 2/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@@ -124,10 -124,33 +124,33 @@@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_
   /*
    * cgroup destruction makes heavy use of work items and there can be a lot
    * of concurrent destructions.  Use a separate workqueue so that cgroup
- - * destruction work items don't end up filling up max_active of system_wq
+ + * destruction work items don't end up filling up max_active of system_percpu_wq
    * which may lead to deadlock.
+  *
+  * A cgroup destruction should enqueue work sequentially to:
+  * cgroup_offline_wq: use for css offline work
+  * cgroup_release_wq: use for css release work
+  * cgroup_free_wq: use for free work
+  *
+  * Rationale for using separate workqueues:
+  * The cgroup root free work may depend on completion of other css offline
+  * operations. If all tasks were enqueued to a single workqueue, this could
+  * create a deadlock scenario where:
+  * - Free work waits for other css offline work to complete.
+  * - But other css offline work is queued after free work in the same queue.
+  *
+  * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
+  * 1. umount net_prio
+  * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
+  * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
+  * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
+  * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
+  *    which can never complete as it's behind in the same queue and
+  *    workqueue's max_active is 1.
    */
- static struct workqueue_struct *cgroup_destroy_wq;
+ static struct workqueue_struct *cgroup_offline_wq;
+ static struct workqueue_struct *cgroup_release_wq;
+ static struct workqueue_struct *cgroup_free_wq;
   
   /* generate an array of cgroup subsystem pointers */
   #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@@ -6346,8 -6349,14 +6370,14 @@@ static int __init cgroup_wq_init(void
          * We would prefer to do this in cgroup_init() above, but that
          * is called before init_workqueues(): so leave this until after.
          */
-       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", WQ_PERCPU, 1);
-       BUG_ON(!cgroup_destroy_wq);
- -      cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
++      cgroup_offline_wq = alloc_workqueue("cgroup_offline", WQ_PERCPU, 1);
+       BUG_ON(!cgroup_offline_wq);
+ 
- -      cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
++      cgroup_release_wq = alloc_workqueue("cgroup_release", WQ_PERCPU, 1);
+       BUG_ON(!cgroup_release_wq);
+ 
- -      cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
++      cgroup_free_wq = alloc_workqueue("cgroup_free", WQ_PERCPU, 1);
+       BUG_ON(!cgroup_free_wq);
         return 0;
   }
   core_initcall(cgroup_wq_init);
author	Tejun Heo <tj@kernel.org>
	Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)
committer	Tejun Heo <tj@kernel.org>
	Fri, 5 Sep 2025 17:08:26 +0000 (07:08 -1000)
		1	2
Documentation/admin-guide/cgroup-v2.rst	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history