From: Peter Zijlstra Date: Thu, 11 Sep 2025 10:09:19 +0000 (+0200) Subject: sched: Cleanup the sched_change NOCLOCK usage X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d4c64207b88a60dd15a38c790bb73c0b6f9a8c40;p=thirdparty%2Flinux.git sched: Cleanup the sched_change NOCLOCK usage Teach the sched_change pattern how to do update_rq_clock(); this allows for some simplifications / cleanups. Suggested-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Acked-by: Tejun Heo Acked-by: Vincent Guittot --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e715147c31b25..3d5659f136240 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2346,10 +2346,8 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p) if (p->cpus_ptr != &p->cpus_mask) return; - scoped_guard (task_rq_lock, p) { - update_rq_clock(scope.rq); + scoped_guard (task_rq_lock, p) do_set_cpus_allowed(p, &ac); - } } void ___migrate_enable(void) @@ -2666,9 +2664,7 @@ void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx static void do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) { - u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK; - - scoped_guard (sched_change, p, flags) { + scoped_guard (sched_change, p, DEQUEUE_SAVE) { p->sched_class->set_cpus_allowed(p, ctx); mm_set_cpus_allowed(p->mm, ctx->new_mask); } @@ -2690,10 +2686,8 @@ void set_cpus_allowed_force(struct task_struct *p, const struct cpumask *new_mas struct rcu_head rcu; }; - scoped_guard (__task_rq_lock, p) { - update_rq_clock(scope.rq); + scoped_guard (__task_rq_lock, p) do_set_cpus_allowed(p, &ac); - } /* * Because this is called with p->pi_lock held, it is not possible @@ -9108,16 +9102,13 @@ static void sched_change_group(struct task_struct *tsk) */ void sched_move_task(struct task_struct *tsk, bool for_autogroup) { - unsigned int queue_flags = - DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; bool resched = false; struct rq *rq; CLASS(task_rq_lock, rq_guard)(tsk); rq = rq_guard.rq; - update_rq_clock(rq); - scoped_guard (sched_change, tsk, queue_flags) { sched_change_group(tsk); if (!for_autogroup) @@ -10792,16 +10783,14 @@ struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int lockdep_assert_rq_held(rq); + if (!(flags & DEQUEUE_NOCLOCK)) { + update_rq_clock(rq); + flags |= DEQUEUE_NOCLOCK; + } + if (flags & DEQUEUE_CLASS) { - if (p->sched_class->switching_from) { - /* - * switching_from_fair() assumes CLASS implies NOCLOCK; - * fixing this assumption would mean switching_from() - * would need to be able to change flags. - */ - WARN_ON(!(flags & DEQUEUE_NOCLOCK)); + if (p->sched_class->switching_from) p->sched_class->switching_from(rq, p); - } } *ctx = (struct sched_change_ctx){ @@ -10840,7 +10829,7 @@ void sched_change_end(struct sched_change_ctx *ctx) p->sched_class->switching_to(rq, p); if (ctx->queued) - enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK); + enqueue_task(rq, p, ctx->flags); if (ctx->running) set_next_task(rq, p); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ad371b612a16f..57170423c3d97 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4654,7 +4654,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) percpu_down_write(&scx_fork_rwsem); scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { - unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; const struct sched_class *old_class = p->sched_class; const struct sched_class *new_class = __setscheduler_class(p->policy, p->prio); @@ -4662,8 +4662,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (!tryget_task_struct(p)) continue; - update_rq_clock(task_rq(p)); - if (old_class != new_class) queue_flags |= DEQUEUE_CLASS; diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 20af5640b0ab4..8f0f603b530be 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -64,7 +64,6 @@ static int effective_prio(struct task_struct *p) void set_user_nice(struct task_struct *p, long nice) { - struct rq *rq; int old_prio; if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) @@ -73,10 +72,7 @@ void set_user_nice(struct task_struct *p, long nice) * We have to be careful, if called from sys_setpriority(), * the task might be in the middle of scheduling on another CPU. */ - CLASS(task_rq_lock, rq_guard)(p); - rq = rq_guard.rq; - - update_rq_clock(rq); + guard(task_rq_lock)(p); /* * The RT priorities are set via sched_setscheduler(), but we still @@ -89,7 +85,7 @@ void set_user_nice(struct task_struct *p, long nice) return; } - scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) { + scoped_guard (sched_change, p, DEQUEUE_SAVE) { p->static_prio = NICE_TO_PRIO(nice); set_load_weight(p, true); old_prio = p->prio;