sched/mmcid: Introduce per task/CPU ownership infrastructure

author Thomas Gleixner <tglx@linutronix.de>

Wed, 19 Nov 2025 17:27:12 +0000 (18:27 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Tue, 25 Nov 2025 18:45:41 +0000 (19:45 +0100)
author Thomas Gleixner <tglx@linutronix.de>
Wed, 19 Nov 2025 17:27:12 +0000 (18:27 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Tue, 25 Nov 2025 18:45:41 +0000 (19:45 +0100)
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h

index 574aba6fe97c5c11cc8310bd765726ca82db5241..87854effe1ad6c4e160508a4b79d433a072ea682 100644 (file)
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -92,7 +92,9 @@ struct rseq_data { };
  
  #ifdef CONFIG_SCHED_MM_CID
  
-#define MM_CID_UNSET   (~0U)
+#define MM_CID_UNSET   BIT(31)
+#define MM_CID_ONCPU   BIT(30)
+#define MM_CID_TRANSIT BIT(29)
  
  /**
   * struct sched_mm_cid - Storage for per task MM CID data
diff --git a/include/linux/sched.h b/include/linux/sched.h

index c411ae021bc5523c1a3dad53f487a30e49e8c40d..9eec409745f8c8ab6a6308a8d86d906f9a61846e 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2299,16 +2299,16 @@ void sched_mm_cid_before_execve(struct task_struct *t);
  void sched_mm_cid_after_execve(struct task_struct *t);
  void sched_mm_cid_fork(struct task_struct *t);
  void sched_mm_cid_exit(struct task_struct *t);
-static inline int task_mm_cid(struct task_struct *t)
+static __always_inline int task_mm_cid(struct task_struct *t)
  {
-       return t->mm_cid.cid;
+       return t->mm_cid.cid & ~(MM_CID_ONCPU | MM_CID_TRANSIT);
  }
  #else
  static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
  static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
  static inline void sched_mm_cid_fork(struct task_struct *t) { }
  static inline void sched_mm_cid_exit(struct task_struct *t) { }
-static inline int task_mm_cid(struct task_struct *t)
+static __always_inline int task_mm_cid(struct task_struct *t)
  {
         /*
          * Use the processor id as a fall-back when the mm cid feature is
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 01903cf03ab2afa9c5e33805ddce3ad1a1792419..55bb9c9ae32c35983882edf1de2493fb616fd270 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10386,6 +10386,16 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
   *
   * The mm::mm_cid:pcpu per CPU storage is protected by the CPUs runqueue
   * lock.
+ *
+ * CID ownership:
+ *
+ * A CID is either owned by a task (stored in task_struct::mm_cid.cid) or
+ * by a CPU (stored in mm::mm_cid.pcpu::cid). CIDs owned by CPUs have the
+ * MM_CID_ONCPU bit set. During transition from CPU to task ownership mode,
+ * MM_CID_TRANSIT is set on the per task CIDs. When this bit is set the
+ * task needs to drop the CID into the pool when scheduling out.  Both bits
+ * (ONCPU and TRANSIT) are filtered out by task_cid() when the CID is
+ * actually handed over to user space in the RSEQ memory.
   */
  
  /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index d539fb269957fdb06395b668823e86c81ee29395..4b49284504fb0a23af90b2770d91247c6e0d2eeb 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3540,6 +3540,65 @@ extern void sched_dynamic_update(int mode);
  extern const char *preempt_modes[];
  
  #ifdef CONFIG_SCHED_MM_CID
+
+static __always_inline bool cid_on_cpu(unsigned int cid)
+{
+       return cid & MM_CID_ONCPU;
+}
+
+static __always_inline bool cid_in_transit(unsigned int cid)
+{
+       return cid & MM_CID_TRANSIT;
+}
+
+static __always_inline unsigned int cpu_cid_to_cid(unsigned int cid)
+{
+       return cid & ~MM_CID_ONCPU;
+}
+
+static __always_inline unsigned int cid_to_cpu_cid(unsigned int cid)
+{
+       return cid | MM_CID_ONCPU;
+}
+
+static __always_inline unsigned int cid_to_transit_cid(unsigned int cid)
+{
+       return cid | MM_CID_TRANSIT;
+}
+
+static __always_inline unsigned int cid_from_transit_cid(unsigned int cid)
+{
+       return cid & ~MM_CID_TRANSIT;
+}
+
+static __always_inline bool cid_on_task(unsigned int cid)
+{
+       /* True if none of the MM_CID_ONCPU, MM_CID_TRANSIT, MM_CID_UNSET bits is set */
+       return cid < MM_CID_TRANSIT;
+}
+
+static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
+{
+       clear_bit(cid, mm_cidmask(mm));
+}
+
+static __always_inline void mm_unset_cid_on_task(struct task_struct *t)
+{
+       unsigned int cid = t->mm_cid.cid;
+
+       t->mm_cid.cid = MM_CID_UNSET;
+       if (cid_on_task(cid))
+               mm_drop_cid(t->mm, cid);
+}
+
+static __always_inline void mm_drop_cid_on_cpu(struct mm_struct *mm, struct mm_cid_pcpu *pcp)
+{
+       /* Clear the ONCPU bit, but do not set UNSET in the per CPU storage */
+       pcp->cid = cpu_cid_to_cid(pcp->cid);
+       mm_drop_cid(mm, pcp->cid);
+}
+
+/* Active implementation */
  static inline void init_sched_mm_cid(struct task_struct *t)
  {
         struct mm_struct *mm = t->mm;
author	Thomas Gleixner <tglx@linutronix.de>
	Wed, 19 Nov 2025 17:27:12 +0000 (18:27 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Tue, 25 Nov 2025 18:45:41 +0000 (19:45 +0100)
include/linux/rseq_types.h		patch \| blob \| blame \| history
include/linux/sched.h		patch \| blob \| blame \| history
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history