From: Tejun Heo <tj@kernel.org>
Date: Fri, 5 Sep 2025 17:08:26 +0000 (-1000)
Subject: cgroup: Merge branch 'for-6.17-fixes' into for-6.18
X-Git-Tag: v6.18-rc1~198^2~23
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4a3e62dfa7b79dc8f759219fe64318ff08e98013;p=thirdparty%2Flinux.git

cgroup: Merge branch 'for-6.17-fixes' into for-6.18

Pull for-6.17-fixes to receive 79f919a89c9d ("cgroup: split
cgroup_destroy_wq into 3 workqueues") to resolve its conflict with
7fa33aa3b001 ("cgroup: WQ_PERCPU added to alloc_workqueue users"). The
latter adds WQ_PERCPU when creating cgroup_destroy_wq and the former splits
the workqueue into three. Resolve by applying WQ_PERCPU to the three split
workqueues.

Signed-off-by: Tejun Heo <tj@kernel.org>
---

4a3e62dfa7b79dc8f759219fe64318ff08e98013
diff --cc kernel/cgroup/cgroup.c
index 99d3b6c0f328c,77d02f87f3f12..0607c5d092378
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@@ -124,10 -124,33 +124,33 @@@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_
  /*
   * cgroup destruction makes heavy use of work items and there can be a lot
   * of concurrent destructions.  Use a separate workqueue so that cgroup
 - * destruction work items don't end up filling up max_active of system_wq
 + * destruction work items don't end up filling up max_active of system_percpu_wq
   * which may lead to deadlock.
+  *
+  * A cgroup destruction should enqueue work sequentially to:
+  * cgroup_offline_wq: use for css offline work
+  * cgroup_release_wq: use for css release work
+  * cgroup_free_wq: use for free work
+  *
+  * Rationale for using separate workqueues:
+  * The cgroup root free work may depend on completion of other css offline
+  * operations. If all tasks were enqueued to a single workqueue, this could
+  * create a deadlock scenario where:
+  * - Free work waits for other css offline work to complete.
+  * - But other css offline work is queued after free work in the same queue.
+  *
+  * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
+  * 1. umount net_prio
+  * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
+  * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
+  * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
+  * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
+  *    which can never complete as it's behind in the same queue and
+  *    workqueue's max_active is 1.
   */
- static struct workqueue_struct *cgroup_destroy_wq;
+ static struct workqueue_struct *cgroup_offline_wq;
+ static struct workqueue_struct *cgroup_release_wq;
+ static struct workqueue_struct *cgroup_free_wq;
  
  /* generate an array of cgroup subsystem pointers */
  #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@@ -6346,8 -6349,14 +6370,14 @@@ static int __init cgroup_wq_init(void
  	 * We would prefer to do this in cgroup_init() above, but that
  	 * is called before init_workqueues(): so leave this until after.
  	 */
- 	cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", WQ_PERCPU, 1);
- 	BUG_ON(!cgroup_destroy_wq);
 -	cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
++	cgroup_offline_wq = alloc_workqueue("cgroup_offline", WQ_PERCPU, 1);
+ 	BUG_ON(!cgroup_offline_wq);
+ 
 -	cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
++	cgroup_release_wq = alloc_workqueue("cgroup_release", WQ_PERCPU, 1);
+ 	BUG_ON(!cgroup_release_wq);
+ 
 -	cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
++	cgroup_free_wq = alloc_workqueue("cgroup_free", WQ_PERCPU, 1);
+ 	BUG_ON(!cgroup_free_wq);
  	return 0;
  }
  core_initcall(cgroup_wq_init);