]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
authorFrederic Weisbecker <frederic@kernel.org>
Wed, 28 May 2025 16:05:32 +0000 (18:05 +0200)
committerFrederic Weisbecker <frederic@kernel.org>
Tue, 3 Feb 2026 14:23:34 +0000 (15:23 +0100)
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...

There are many drawbacks to that approach:

1) Most interested subsystems want to know about all isolated CPUs, not
  just those defined on boot time.

2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
  concurrent cpuset changes.

3) Further cpuset modifications are not propagated to subsystems

Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.

Subsystems can rely on RCU to synchronize against concurrent changes.

The propagation mentioned in 3) will be handled in further patches.

[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
include/linux/sched/isolation.h
kernel/cgroup/cpuset.c
kernel/sched/isolation.c
kernel/sched/sched.h

index c7cf6934489cdd0a1ad586096679fda01b065ac3..d8d9baf445169f8e534d92a4b20382dad411a769 100644 (file)
@@ -9,6 +9,11 @@
 enum hk_type {
        /* Inverse of boot-time isolcpus= argument */
        HK_TYPE_DOMAIN_BOOT,
+       /*
+        * Same as HK_TYPE_DOMAIN_BOOT but also includes the
+        * inverse of cpuset isolated partitions. As such it
+        * is always a subset of HK_TYPE_DOMAIN_BOOT.
+        */
        HK_TYPE_DOMAIN,
        /* Inverse of boot-time isolcpus=managed_irq argument */
        HK_TYPE_MANAGED_IRQ,
@@ -35,6 +40,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
 extern bool housekeeping_enabled(enum hk_type type);
 extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
 extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
+extern int housekeeping_update(struct cpumask *isol_mask);
 extern void __init housekeeping_init(void);
 
 #else
@@ -62,6 +68,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
        return true;
 }
 
+static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
 static inline void housekeeping_init(void) { }
 #endif /* CONFIG_CPU_ISOLATION */
 
index 5e2e3514c22e958053a9cbb45afdbed09e27e7d1..e146e1f34bf97a061206481fb1bea9d486e08706 100644 (file)
@@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
        if (!isolated_cpus_updating)
                return;
 
-       lockdep_assert_cpus_held();
-
        ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
        WARN_ON_ONCE(ret < 0);
 
        ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
        WARN_ON_ONCE(ret < 0);
 
+       ret = housekeeping_update(isolated_cpus);
+       WARN_ON_ONCE(ret < 0);
+
        isolated_cpus_updating = false;
 }
 
index 6f77289c14c3528e1496b402d543c33214d0b9fe..674a02891b380bbc9b1a048d6c7e24577a9e50f1 100644 (file)
@@ -29,18 +29,48 @@ static struct housekeeping housekeeping;
 
 bool housekeeping_enabled(enum hk_type type)
 {
-       return !!(housekeeping.flags & BIT(type));
+       return !!(READ_ONCE(housekeeping.flags) & BIT(type));
 }
 EXPORT_SYMBOL_GPL(housekeeping_enabled);
 
+static bool housekeeping_dereference_check(enum hk_type type)
+{
+       if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
+               /* Cpuset isn't even writable yet? */
+               if (system_state <= SYSTEM_SCHEDULING)
+                       return true;
+
+               /* CPU hotplug write locked, so cpuset partition can't be overwritten */
+               if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
+                       return true;
+
+               /* Cpuset lock held, partitions not writable */
+               if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
+                       return true;
+
+               return false;
+       }
+
+       return true;
+}
+
+static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
+{
+       return rcu_dereference_all_check(housekeeping.cpumasks[type],
+                                        housekeeping_dereference_check(type));
+}
+
 const struct cpumask *housekeeping_cpumask(enum hk_type type)
 {
+       const struct cpumask *mask = NULL;
+
        if (static_branch_unlikely(&housekeeping_overridden)) {
-               if (housekeeping.flags & BIT(type)) {
-                       return rcu_dereference_check(housekeeping.cpumasks[type], 1);
-               }
+               if (READ_ONCE(housekeeping.flags) & BIT(type))
+                       mask = housekeeping_cpumask_dereference(type);
        }
-       return cpu_possible_mask;
+       if (!mask)
+               mask = cpu_possible_mask;
+       return mask;
 }
 EXPORT_SYMBOL_GPL(housekeeping_cpumask);
 
@@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
 
 bool housekeeping_test_cpu(int cpu, enum hk_type type)
 {
-       if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
+       if (static_branch_unlikely(&housekeeping_overridden) &&
+           READ_ONCE(housekeeping.flags) & BIT(type))
                return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
        return true;
 }
 EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
 
+int housekeeping_update(struct cpumask *isol_mask)
+{
+       struct cpumask *trial, *old = NULL;
+
+       lockdep_assert_cpus_held();
+
+       trial = kmalloc(cpumask_size(), GFP_KERNEL);
+       if (!trial)
+               return -ENOMEM;
+
+       cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
+       if (!cpumask_intersects(trial, cpu_online_mask)) {
+               kfree(trial);
+               return -EINVAL;
+       }
+
+       if (!housekeeping.flags)
+               static_branch_enable_cpuslocked(&housekeeping_overridden);
+
+       if (housekeeping.flags & HK_FLAG_DOMAIN)
+               old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
+       else
+               WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
+       rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
+
+       synchronize_rcu();
+
+       kfree(old);
+
+       return 0;
+}
+
 void __init housekeeping_init(void)
 {
        enum hk_type type;
index 475bdab3b8db2c23034f2c2769828c61d01130d3..653e898a996a46064f34b1f2bd4b37e278c3dfb7 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/context_tracking.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
 #include <linux/ctype.h>
 #include <linux/file.h>
 #include <linux/fs_api.h>