From: Peter Zijlstra Date: Thu, 15 Jan 2026 08:17:49 +0000 (+0100) Subject: sched: Audit MOVE vs balance_callbacks X-Git-Tag: v6.19-rc6~12^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=53439363c0a111f11625982b69c88ee2ce8608ec;p=thirdparty%2Flinux.git sched: Audit MOVE vs balance_callbacks The {DE,EN}QUEUE_MOVE flag indicates a task is allowed to change priority, which means there could be balance callbacks queued. Therefore audit all MOVE users and make sure they do run balance callbacks before dropping rq-lock. Fixes: 6455ad5346c9 ("sched: Move sched_class::prio_changed() into the change pattern") Signed-off-by: Peter Zijlstra (Intel) Tested-by: Pierre Gondois Tested-by: Juri Lelli Link: https://patch.msgid.link/20260114130528.GB831285@noisy.programming.kicks-ass.net --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 842a3adaf746..4d925d7ad097 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4950,7 +4950,7 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq) return __splice_balance_callbacks(rq, true); } -static void __balance_callbacks(struct rq *rq, struct rq_flags *rf) +void __balance_callbacks(struct rq *rq, struct rq_flags *rf) { if (rf) rq_unpin_lock(rq, rf); @@ -9126,6 +9126,8 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup) if (resched) resched_curr(rq); + + __balance_callbacks(rq, &rq_guard.rf); } static struct cgroup_subsys_state * diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8f6d8d7f895c..afe28c04d5aa 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -545,6 +545,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter) static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter) { if (iter->locked_task) { + __balance_callbacks(iter->rq, &iter->rf); task_rq_unlock(iter->rq, iter->locked_task, &iter->rf); iter->locked_task = NULL; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e885a935b716..93fce4bbff5e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2388,7 +2388,8 @@ extern const u32 sched_prio_to_wmult[40]; * should preserve as much state as possible. * * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location - * in the runqueue. + * in the runqueue. IOW the priority is allowed to change. Callers + * must expect to deal with balance callbacks. * * NOCLOCK - skip the update_rq_clock() (avoids double updates) * @@ -3969,6 +3970,8 @@ extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); extern struct balance_callback *splice_balance_callbacks(struct rq *rq); + +extern void __balance_callbacks(struct rq *rq, struct rq_flags *rf); extern void balance_callbacks(struct rq *rq, struct balance_callback *head); /*