From abfc01077df66593f128d966fdad1d042facc9ac Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Sep 2025 09:51:06 +0200 Subject: [PATCH] sched: Fix do_set_cpus_allowed() locking All callers of do_set_cpus_allowed() only take p->pi_lock, which is not sufficient to actually change the cpumask. Again, this is mostly ok in these cases, but it results in unnecessarily complicated reasoning. Furthermore, there is no reason what so ever to not just take all the required locks, so do just that. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Acked-by: Tejun Heo Acked-by: Vincent Guittot --- kernel/kthread.c | 15 +++++---------- kernel/sched/core.c | 21 +++++++-------------- kernel/sched/sched.h | 5 +++++ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 31b072e8d4279..832bd2afecc68 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -593,18 +593,16 @@ EXPORT_SYMBOL(kthread_create_on_node); static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state) { - unsigned long flags; - if (!wait_task_inactive(p, state)) { WARN_ON(1); return; } + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) + do_set_cpus_allowed(p, mask); + /* It's safe because the task is inactive. */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - do_set_cpus_allowed(p, mask); p->flags |= PF_NO_SETAFFINITY; - raw_spin_unlock_irqrestore(&p->pi_lock, flags); } static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state) @@ -857,7 +855,6 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) { struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; - unsigned long flags; int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { @@ -882,10 +879,8 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) list_add_tail(&kthread->hotplug_node, &kthreads_hotplug); kthread_fetch_affinity(kthread, affinity); - /* It's safe because the task is inactive. */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - do_set_cpus_allowed(p, affinity); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) + do_set_cpus_allowed(p, affinity); mutex_unlock(&kthreads_hotplug_lock); out: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f2d16d10516a7..805e65007e62f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2668,18 +2668,14 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) bool queued, running; lockdep_assert_held(&p->pi_lock); + lockdep_assert_rq_held(rq); queued = task_on_rq_queued(p); running = task_current_donor(rq, p); - if (queued) { - /* - * Because __kthread_bind() calls this on blocked tasks without - * holding rq->lock. - */ - lockdep_assert_rq_held(rq); + if (queued) dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); - } + if (running) put_prev_task(rq, p); @@ -2708,7 +2704,10 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) struct rcu_head rcu; }; - __do_set_cpus_allowed(p, &ac); + scoped_guard (__task_rq_lock, p) { + update_rq_clock(scope.rq); + __do_set_cpus_allowed(p, &ac); + } /* * Because this is called with p->pi_lock held, it is not possible @@ -3483,12 +3482,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p) } fallthrough; case possible: - /* - * XXX When called from select_task_rq() we only - * hold p->pi_lock and again violate locking order. - * - * More yuck to audit. - */ do_set_cpus_allowed(p, task_cpu_fallback_mask(p)); state = fail; break; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bcde43deb8e9f..b23ce9c77611e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1847,6 +1847,11 @@ DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct, task_rq_unlock(_T->rq, _T->lock, &_T->rf), struct rq *rq; struct rq_flags rf) +DEFINE_LOCK_GUARD_1(__task_rq_lock, struct task_struct, + _T->rq = __task_rq_lock(_T->lock, &_T->rf), + __task_rq_unlock(_T->rq, &_T->rf), + struct rq *rq; struct rq_flags rf) + static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { -- 2.47.3