sched/mmcid: Provide precomputed maximal value

author Thomas Gleixner <tglx@linutronix.de>

Wed, 19 Nov 2025 17:27:09 +0000 (18:27 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Tue, 25 Nov 2025 18:45:40 +0000 (19:45 +0100)
author Thomas Gleixner <tglx@linutronix.de>
Wed, 19 Nov 2025 17:27:09 +0000 (18:27 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Tue, 25 Nov 2025 18:45:40 +0000 (19:45 +0100)
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h

index d7e8071b626a4c4b3911d2c56e6418a2ee4911c4..0fab369999b6060564e3c3a02a0c367e9e49cf15 100644 (file)
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -117,14 +117,20 @@ struct mm_cid_pcpu {
  /**
   * struct mm_mm_cid - Storage for per MM CID data
   * @pcpu:              Per CPU storage for CIDs associated to a CPU
+ * @max_cids:          The exclusive maximum CID value for allocation and convergence
   * @nr_cpus_allowed:   The number of CPUs in the per MM allowed CPUs map. The map
   *                     is growth only.
+ * @users:             The number of tasks sharing this MM. Separate from mm::mm_users
+ *                     as that is modified by mmget()/mm_put() by other entities which
+ *                     do not actually share the MM.
   * @lock:              Spinlock to protect all fields except @pcpu. It also protects
   *                     the MM cid cpumask and the MM cidmask bitmap.
   */
  struct mm_mm_cid {
         struct mm_cid_pcpu      __percpu *pcpu;
+       unsigned int            max_cids;
         unsigned int            nr_cpus_allowed;
+       unsigned int            users;
         raw_spinlock_t          lock;
  }____cacheline_aligned_in_smp;
  #else /* CONFIG_SCHED_MM_CID */
diff --git a/kernel/fork.c b/kernel/fork.c

index 74bc7c9f1bb34c97d6d5ce9892842be4e04c5336..6c23219e116975657c5f7eb1b164c203c4d86600 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2455,6 +2455,7 @@ bad_fork_cleanup_namespaces:
         exit_task_namespaces(p);
  bad_fork_cleanup_mm:
         if (p->mm) {
+               sched_mm_cid_exit(p);
                 mm_clear_owner(p->mm, p);
                 mmput(p->mm);
         }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 34b6c31eca3a4f243165a2c42e7961ec99ad3144..f9295c42da2290e28f509d7aa57db103f9c9fecc 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4485,7 +4485,6 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
         init_numa_balancing(clone_flags, p);
         p->wake_entry.u_flags = CSD_TYPE_TTWU;
         p->migration_pending = NULL;
-       init_sched_mm_cid(p);
  }
  
  DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -10371,15 +10370,27 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
  
  #ifdef CONFIG_SCHED_MM_CID
  /*
- * When a task exits, the MM CID held by the task is not longer required as
- * the task cannot return to user space.
+ * Update the CID range properties when the constraints change. Invoked via
+ * fork(), exit() and affinity changes
   */
+static void mm_update_max_cids(struct mm_struct *mm)
+{
+       struct mm_mm_cid *mc = &mm->mm_cid;
+       unsigned int max_cids;
+
+       lockdep_assert_held(&mm->mm_cid.lock);
+
+       /* Calculate the new maximum constraint */
+       max_cids = min(mc->nr_cpus_allowed, mc->users);
+       WRITE_ONCE(mc->max_cids, max_cids);
+}
+
  static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk)
  {
         struct cpumask *mm_allowed;
         unsigned int weight;
  
-       if (!mm)
+       if (!mm || !READ_ONCE(mm->mm_cid.users))
                 return;
  
         /*
@@ -10389,9 +10400,30 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu
         guard(raw_spinlock)(&mm->mm_cid.lock);
         mm_allowed = mm_cpus_allowed(mm);
         weight = cpumask_weighted_or(mm_allowed, mm_allowed, affmsk);
+       if (weight == mm->mm_cid.nr_cpus_allowed)
+               return;
         WRITE_ONCE(mm->mm_cid.nr_cpus_allowed, weight);
+       mm_update_max_cids(mm);
+}
+
+void sched_mm_cid_fork(struct task_struct *t)
+{
+       struct mm_struct *mm = t->mm;
+
+       WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET);
+
+       guard(raw_spinlock)(&mm->mm_cid.lock);
+       t->mm_cid.active = 1;
+       mm->mm_cid.users++;
+       /* Preset last_cid for mm_cid_select() */
+       t->mm_cid.last_cid = READ_ONCE(mm->mm_cid.max_cids) - 1;
+       mm_update_max_cids(mm);
  }
  
+/*
+ * When a task exits, the MM CID held by the task is not longer required as
+ * the task cannot return to user space.
+ */
  void sched_mm_cid_exit(struct task_struct *t)
  {
         struct mm_struct *mm = t->mm;
@@ -10399,12 +10431,14 @@ void sched_mm_cid_exit(struct task_struct *t)
         if (!mm || !t->mm_cid.active)
                 return;
  
-       guard(preempt)();
+       guard(raw_spinlock)(&mm->mm_cid.lock);
         t->mm_cid.active = 0;
+       mm->mm_cid.users--;
         if (t->mm_cid.cid != MM_CID_UNSET) {
                 clear_bit(t->mm_cid.cid, mm_cidmask(mm));
                 t->mm_cid.cid = MM_CID_UNSET;
         }
+       mm_update_max_cids(mm);
  }
  
  /* Deactivate MM CID allocation across execve() */
@@ -10416,22 +10450,11 @@ void sched_mm_cid_before_execve(struct task_struct *t)
  /* Reactivate MM CID after successful execve() */
  void sched_mm_cid_after_execve(struct task_struct *t)
  {
-       struct mm_struct *mm = t->mm;
-
-       if (!mm)
-               return;
-
+       sched_mm_cid_fork(t);
         guard(preempt)();
-       t->mm_cid.active = 1;
         mm_cid_select(t);
  }
  
-void sched_mm_cid_fork(struct task_struct *t)
-{
-       WARN_ON_ONCE(!t->mm || t->mm_cid.cid != MM_CID_UNSET);
-       t->mm_cid.active = 1;
-}
-
  void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
  {
         struct mm_cid_pcpu __percpu *pcpu = mm->mm_cid.pcpu;
@@ -10440,7 +10463,9 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
         for_each_possible_cpu(cpu)
                 per_cpu_ptr(pcpu, cpu)->cid = MM_CID_UNSET;
  
+       mm->mm_cid.max_cids = 0;
         mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed;
+       mm->mm_cid.users = 0;
         raw_spin_lock_init(&mm->mm_cid.lock);
         cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
         bitmap_zero(mm_cidmask(mm), num_possible_cpus());
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 31f2e431db5e382e8231e74bdb432ec83993b206..d539fb269957fdb06395b668823e86c81ee29395 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3571,7 +3571,7 @@ static inline bool mm_cid_get(struct task_struct *t)
         struct mm_struct *mm = t->mm;
         unsigned int max_cids;
  
-       max_cids = min_t(int, READ_ONCE(mm->mm_cid.nr_cpus_allowed), atomic_read(&mm->mm_users));
+       max_cids = READ_ONCE(mm->mm_cid.max_cids);
  
         /* Try to reuse the last CID of this task */
         if (__mm_cid_get(t, t->mm_cid.last_cid, max_cids))
@@ -3614,7 +3614,6 @@ static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *n
  }
  
  #else /* !CONFIG_SCHED_MM_CID: */
-static inline void init_sched_mm_cid(struct task_struct *t) { }
  static inline void mm_cid_select(struct task_struct *t) { }
  static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
  #endif /* !CONFIG_SCHED_MM_CID */
author	Thomas Gleixner <tglx@linutronix.de>
	Wed, 19 Nov 2025 17:27:09 +0000 (18:27 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Tue, 25 Nov 2025 18:45:40 +0000 (19:45 +0100)
include/linux/rseq_types.h		patch \| blob \| blame \| history
kernel/fork.c		patch \| blob \| blame \| history
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history