1 From 8fef0a3b17bb258130a4fcbcb5addf94b25e9ec5 Mon Sep 17 00:00:00 2001
2 From: Tejun Heo <tj@kernel.org>
3 Date: Tue, 25 Feb 2025 06:02:23 -1000
4 Subject: sched_ext: Fix pick_task_scx() picking non-queued tasks when it's called without balance()
6 From: Tejun Heo <tj@kernel.org>
8 commit 8fef0a3b17bb258130a4fcbcb5addf94b25e9ec5 upstream.
10 a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called
11 without preceding balance_scx()") added a workaround to handle the cases
12 where pick_task_scx() is called without prececing balance_scx() which is due
13 to a fair class bug where pick_taks_fair() may return NULL after a true
14 return from balance_fair().
16 The workaround detects when pick_task_scx() is called without preceding
17 balance_scx() and emulates SCX_RQ_BAL_KEEP and triggers kicking to avoid
18 stalling. Unfortunately, the workaround code was testing whether @prev was
19 on SCX to decide whether to keep the task running. This is incorrect as the
20 task may be on SCX but no longer runnable.
22 This could lead to a non-runnable task to be returned from pick_task_scx()
23 which cause interesting confusions and failures. e.g. A common failure mode
24 is the task ending up with (!on_rq && on_cpu) state which can cause
25 potential wakers to busy loop, which can easily lead to deadlocks.
27 Fix it by testing whether @prev has SCX_TASK_QUEUED set. This makes
28 @prev_on_scx only used in one place. Open code the usage and improve the
31 Signed-off-by: Tejun Heo <tj@kernel.org>
32 Reported-by: Pat Cody <patcody@meta.com>
33 Fixes: a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called without preceding balance_scx()")
34 Cc: stable@vger.kernel.org # v6.12+
35 Acked-by: Andrea Righi <arighi@nvidia.com>
36 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
38 kernel/sched/ext.c | 11 +++++++----
39 1 file changed, 7 insertions(+), 4 deletions(-)
41 --- a/kernel/sched/ext.c
42 +++ b/kernel/sched/ext.c
43 @@ -3047,7 +3047,6 @@ static struct task_struct *pick_task_scx
45 struct task_struct *prev = rq->curr;
46 struct task_struct *p;
47 - bool prev_on_scx = prev->sched_class == &ext_sched_class;
48 bool keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;
49 bool kick_idle = false;
51 @@ -3067,14 +3066,18 @@ static struct task_struct *pick_task_scx
52 * if pick_task_scx() is called without preceding balance_scx().
54 if (unlikely(rq->scx.flags & SCX_RQ_BAL_PENDING)) {
56 + if (prev->scx.flags & SCX_TASK_QUEUED) {
62 - } else if (unlikely(keep_prev && !prev_on_scx)) {
63 - /* only allowed during transitions */
64 + } else if (unlikely(keep_prev &&
65 + prev->sched_class != &ext_sched_class)) {
67 + * Can happen while enabling as SCX_RQ_BAL_PENDING assertion is
68 + * conditional on scx_enabled() and may have been skipped.
70 WARN_ON_ONCE(scx_ops_enable_state() == SCX_OPS_ENABLED);