sched: Cleanup the sched_change NOCLOCK usage

author Peter Zijlstra <peterz@infradead.org>

Thu, 11 Sep 2025 10:09:19 +0000 (12:09 +0200)

committer Peter Zijlstra <peterz@infradead.org>

Thu, 16 Oct 2025 09:13:54 +0000 (11:13 +0200)
author Peter Zijlstra <peterz@infradead.org>
Thu, 11 Sep 2025 10:09:19 +0000 (12:09 +0200)
committer Peter Zijlstra <peterz@infradead.org>
Thu, 16 Oct 2025 09:13:54 +0000 (11:13 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index e715147c31b2508ccaa3ae7f2da966337d6b47d3..3d5659f136240ed7f475ea6b0371f7ba1dc69380 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2346,10 +2346,8 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
         if (p->cpus_ptr != &p->cpus_mask)
                 return;
  
-       scoped_guard (task_rq_lock, p) {
-               update_rq_clock(scope.rq);
+       scoped_guard (task_rq_lock, p)
                 do_set_cpus_allowed(p, &ac);
-       }
  }
  
  void ___migrate_enable(void)
@@ -2666,9 +2664,7 @@ void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx
  static void
  do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
  {
-       u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK;
-
-       scoped_guard (sched_change, p, flags) {
+       scoped_guard (sched_change, p, DEQUEUE_SAVE) {
                 p->sched_class->set_cpus_allowed(p, ctx);
                 mm_set_cpus_allowed(p->mm, ctx->new_mask);
         }
@@ -2690,10 +2686,8 @@ void set_cpus_allowed_force(struct task_struct *p, const struct cpumask *new_mas
                 struct rcu_head rcu;
         };
  
-       scoped_guard (__task_rq_lock, p) {
-               update_rq_clock(scope.rq);
+       scoped_guard (__task_rq_lock, p)
                 do_set_cpus_allowed(p, &ac);
-       }
  
         /*
          * Because this is called with p->pi_lock held, it is not possible
@@ -9108,16 +9102,13 @@ static void sched_change_group(struct task_struct *tsk)
   */
  void sched_move_task(struct task_struct *tsk, bool for_autogroup)
  {
-       unsigned int queue_flags =
-               DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+       unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
         bool resched = false;
         struct rq *rq;
  
         CLASS(task_rq_lock, rq_guard)(tsk);
         rq = rq_guard.rq;
  
-       update_rq_clock(rq);
-
         scoped_guard (sched_change, tsk, queue_flags) {
                 sched_change_group(tsk);
                 if (!for_autogroup)
@@ -10792,16 +10783,14 @@ struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int
  
         lockdep_assert_rq_held(rq);
  
+       if (!(flags & DEQUEUE_NOCLOCK)) {
+               update_rq_clock(rq);
+               flags |= DEQUEUE_NOCLOCK;
+       }
+
         if (flags & DEQUEUE_CLASS) {
-               if (p->sched_class->switching_from) {
-                       /*
-                        * switching_from_fair() assumes CLASS implies NOCLOCK;
-                        * fixing this assumption would mean switching_from()
-                        * would need to be able to change flags.
-                        */
-                       WARN_ON(!(flags & DEQUEUE_NOCLOCK));
+               if (p->sched_class->switching_from)
                         p->sched_class->switching_from(rq, p);
-               }
         }
  
         *ctx = (struct sched_change_ctx){
@@ -10840,7 +10829,7 @@ void sched_change_end(struct sched_change_ctx *ctx)
                 p->sched_class->switching_to(rq, p);
  
         if (ctx->queued)
-               enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK);
+               enqueue_task(rq, p, ctx->flags);
         if (ctx->running)
                 set_next_task(rq, p);
  
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index ad371b612a16f4f22c4cabb6793a10da01d7adcd..57170423c3d973c601716c1f4c3167304aaf4e40 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4654,7 +4654,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
         percpu_down_write(&scx_fork_rwsem);
         scx_task_iter_start(&sti);
         while ((p = scx_task_iter_next_locked(&sti))) {
-               unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+               unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
                 const struct sched_class *old_class = p->sched_class;
                 const struct sched_class *new_class =
                         __setscheduler_class(p->policy, p->prio);
@@ -4662,8 +4662,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
                 if (!tryget_task_struct(p))
                         continue;
  
-               update_rq_clock(task_rq(p));
-
                 if (old_class != new_class)
                         queue_flags |= DEQUEUE_CLASS;
  
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c

index 20af5640b0ab49480fe58914b705b0f620343011..8f0f603b530bef3cba3ba54037ad6716f3f50eb1 100644 (file)
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -64,7 +64,6 @@ static int effective_prio(struct task_struct *p)
  
  void set_user_nice(struct task_struct *p, long nice)
  {
-       struct rq *rq;
         int old_prio;
  
         if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
@@ -73,10 +72,7 @@ void set_user_nice(struct task_struct *p, long nice)
          * We have to be careful, if called from sys_setpriority(),
          * the task might be in the middle of scheduling on another CPU.
          */
-       CLASS(task_rq_lock, rq_guard)(p);
-       rq = rq_guard.rq;
-
-       update_rq_clock(rq);
+       guard(task_rq_lock)(p);
  
         /*
          * The RT priorities are set via sched_setscheduler(), but we still
@@ -89,7 +85,7 @@ void set_user_nice(struct task_struct *p, long nice)
                 return;
         }
  
-       scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
+       scoped_guard (sched_change, p, DEQUEUE_SAVE) {
                 p->static_prio = NICE_TO_PRIO(nice);
                 set_load_weight(p, true);
                 old_prio = p->prio;
author	Peter Zijlstra <peterz@infradead.org>
	Thu, 11 Sep 2025 10:09:19 +0000 (12:09 +0200)
committer	Peter Zijlstra <peterz@infradead.org>
	Thu, 16 Oct 2025 09:13:54 +0000 (11:13 +0200)
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/ext.c		patch \| blob \| blame \| history
kernel/sched/syscalls.c		patch \| blob \| blame \| history