]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched/isolation: Flush memcg workqueues on cpuset isolated partition change
authorFrederic Weisbecker <frederic@kernel.org>
Thu, 12 Jun 2025 13:36:16 +0000 (15:36 +0200)
committerFrederic Weisbecker <frederic@kernel.org>
Tue, 3 Feb 2026 14:23:34 +0000 (15:23 +0100)
The HK_TYPE_DOMAIN housekeeping cpumask is now modifiable at runtime. In
order to synchronize against memcg workqueue to make sure that no
asynchronous draining is still pending or executing on a newly made
isolated CPU, the housekeeping susbsystem must flush the memcg
workqueues.

However the memcg workqueues can't be flushed easily since they are
queued to the main per-CPU workqueue pool.

Solve this with creating a memcg specific pool and provide and use the
appropriate flushing API.

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
Cc: linux-mm@kvack.org
include/linux/memcontrol.h
kernel/sched/isolation.c
kernel/sched/sched.h
mm/memcontrol.c

index 0651865a4564fab9feb5c1d1aa24bd9543826cbe..5b004b95648bae89e4ab7954205f6d786b1b6155 100644 (file)
@@ -1037,6 +1037,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
        return id;
 }
 
+void mem_cgroup_flush_workqueue(void);
+
 extern int mem_cgroup_init(void);
 #else /* CONFIG_MEMCG */
 
@@ -1436,6 +1438,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
        return 0;
 }
 
+static inline void mem_cgroup_flush_workqueue(void) { }
+
 static inline int mem_cgroup_init(void) { return 0; }
 #endif /* CONFIG_MEMCG */
 
index 674a02891b380bbc9b1a048d6c7e24577a9e50f1..f4053ebf4027649f78e7d7b1bace950b27ab11f8 100644 (file)
@@ -144,6 +144,8 @@ int housekeeping_update(struct cpumask *isol_mask)
 
        synchronize_rcu();
 
+       mem_cgroup_flush_workqueue();
+
        kfree(old);
 
        return 0;
index 653e898a996a46064f34b1f2bd4b37e278c3dfb7..65dfa48e54b7874247c1fd3cf14afff2a55c903c 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/lockdep_api.h>
 #include <linux/lockdep.h>
 #include <linux/memblock.h>
+#include <linux/memcontrol.h>
 #include <linux/minmax.h>
 #include <linux/mm.h>
 #include <linux/module.h>
index 2289a0299331809da0d192ad6e1acdec68caa71f..b3ca241bb1d6ec1bf12b81f52f999ff6632ce2ce 100644 (file)
@@ -96,6 +96,8 @@ static bool cgroup_memory_nokmem __ro_after_init;
 /* BPF memory accounting disabled? */
 static bool cgroup_memory_nobpf __ro_after_init;
 
+static struct workqueue_struct *memcg_wq __ro_after_init;
+
 static struct kmem_cache *memcg_cachep;
 static struct kmem_cache *memcg_pn_cachep;
 
@@ -2013,7 +2015,7 @@ static void schedule_drain_work(int cpu, struct work_struct *work)
         */
        guard(rcu)();
        if (!cpu_is_isolated(cpu))
-               schedule_work_on(cpu, work);
+               queue_work_on(cpu, memcg_wq, work);
 }
 
 /*
@@ -5125,6 +5127,11 @@ void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages)
        refill_stock(memcg, nr_pages);
 }
 
+void mem_cgroup_flush_workqueue(void)
+{
+       flush_workqueue(memcg_wq);
+}
+
 static int __init cgroup_memory(char *s)
 {
        char *token;
@@ -5167,6 +5174,9 @@ int __init mem_cgroup_init(void)
        cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
                                  memcg_hotplug_cpu_dead);
 
+       memcg_wq = alloc_workqueue("memcg", WQ_PERCPU, 0);
+       WARN_ON(!memcg_wq);
+
        for_each_possible_cpu(cpu) {
                INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
                          drain_local_memcg_stock);