]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched/fair: Fix newidle vs core-sched
authorAaron Lu <ziqianlu@bytedance.com>
Wed, 3 Jun 2026 09:51:08 +0000 (17:51 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 11 Jun 2026 11:41:23 +0000 (13:41 +0200)
While testing Prateek's throttle series, I noticed a panic issue when
coresched is enabled and bisected to this patch.

I fed the panic log and this patch to an agent and its analysis looks
correct to me(cpu56 and cpu57 are siblings in a VM):

       cpu57 (holds core-wide lock)

     pick_next_task() [core scheduling]
     for_each_cpu_wrap(i, smt_mask, 57):
       i=57: pick_task(rq_57)
             pick_task_fair(rq_57)
             -> picks task A
       rq_57->core_pick = task A
       // task_rq(A) == rq_57

       i=56: pick_task(rq_56)
             pick_task_fair(rq_56)
             cfs_rq->nr_queued == 0
             goto idle
             sched_balance_newidle(rq_56)
             raw_spin_rq_unlock(rq_56)
             // core-wide lock released
             newidle_balance() pulls
               task A: rq_57 -> rq_56
             // task_rq(A) == rq_56 now
             raw_spin_rq_lock(rq_56)
             // core-wide lock re-acquired
             return > 0
             goto again
             pick_task_fair(rq_56)
             -> picks task A
       rq_56->core_pick = task A

     // first loop done
     // rq_57->core_pick is still task A (set before lock release)
     // but task_rq(A) == rq_56 now
     next = rq_57->core_pick  // = task A

     put_prev_set_next_task(rq_57, prev, task A)
     __set_next_task_fair(rq_57, task A)
     hrtick_start_fair(rq_57, task A)
     WARN_ON_ONCE(task_rq(task A) != rq_57)
     // task_rq(A) == rq_56

IOW: by allowing pick_task_fair() to do newidle_balance and not returning
RETRY_TASK, it can end up selecting the same task on two CPUs. Restore the
previous state by never doing newidle when core scheduling is enabled.

Tested-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: "Aaron Lu" <ziqianlu@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260603095108.GA1684319@bytedance.com
kernel/sched/fair.c

index 1b23e73f48b084f46f37decee85e180dfada7cf3..d78467ec6ee1343050fcc2794dafb38ade3599e5 100644 (file)
@@ -9942,6 +9942,9 @@ again:
        return p;
 
 idle:
+       if (sched_core_enabled(rq))
+               return NULL;
+
        new_tasks = sched_balance_newidle(rq, rf);
        if (new_tasks < 0)
                return RETRY_TASK;