]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 8 Apr 2026 13:23:29 +0000 (15:23 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 8 Apr 2026 13:23:29 +0000 (15:23 +0200)
added patches:
sched_ext-fix-stale-direct-dispatch-state-in-ddsp_dsq_id.patch
sched_ext-refactor-do_enqueue_task-local-and-global-dsq-paths.patch

queue-6.18/sched_ext-fix-stale-direct-dispatch-state-in-ddsp_dsq_id.patch [new file with mode: 0644]
queue-6.18/sched_ext-refactor-do_enqueue_task-local-and-global-dsq-paths.patch [new file with mode: 0644]
queue-6.18/series

diff --git a/queue-6.18/sched_ext-fix-stale-direct-dispatch-state-in-ddsp_dsq_id.patch b/queue-6.18/sched_ext-fix-stale-direct-dispatch-state-in-ddsp_dsq_id.patch
new file mode 100644 (file)
index 0000000..1da40de
--- /dev/null
@@ -0,0 +1,174 @@
+From stable+bounces-233886-greg=kroah.com@vger.kernel.org Wed Apr  8 14:49:54 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed,  8 Apr 2026 08:47:41 -0400
+Subject: sched_ext: Fix stale direct dispatch state in ddsp_dsq_id
+To: stable@vger.kernel.org
+Cc: Andrea Righi <arighi@nvidia.com>, Daniel Hodges <hodgesd@meta.com>, Patrick Somaru <patsomaru@meta.com>, Tejun Heo <tj@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260408124741.1019690-2-sashal@kernel.org>
+
+From: Andrea Righi <arighi@nvidia.com>
+
+[ Upstream commit 7e0ffb72de8aa3b25989c2d980e81b829c577010 ]
+
+@p->scx.ddsp_dsq_id can be left set (non-SCX_DSQ_INVALID) triggering a
+spurious warning in mark_direct_dispatch() when the next wakeup's
+ops.select_cpu() calls scx_bpf_dsq_insert(), such as:
+
+ WARNING: kernel/sched/ext.c:1273 at scx_dsq_insert_commit+0xcd/0x140
+
+The root cause is that ddsp_dsq_id was only cleared in dispatch_enqueue(),
+which is not reached in all paths that consume or cancel a direct dispatch
+verdict.
+
+Fix it by clearing it at the right places:
+
+ - direct_dispatch(): cache the direct dispatch state in local variables
+   and clear it before dispatch_enqueue() on the synchronous path. For
+   the deferred path, the direct dispatch state must remain set until
+   process_ddsp_deferred_locals() consumes them.
+
+ - process_ddsp_deferred_locals(): cache the dispatch state in local
+   variables and clear it before calling dispatch_to_local_dsq(), which
+   may migrate the task to another rq.
+
+ - do_enqueue_task(): clear the dispatch state on the enqueue path
+   (local/global/bypass fallbacks), where the direct dispatch verdict is
+   ignored.
+
+ - dequeue_task_scx(): clear the dispatch state after dispatch_dequeue()
+   to handle both the deferred dispatch cancellation and the holding_cpu
+   race, covering all cases where a pending direct dispatch is
+   cancelled.
+
+ - scx_disable_task(): clear the direct dispatch state when
+   transitioning a task out of the current scheduler. Waking tasks may
+   have had the direct dispatch state set by the outgoing scheduler's
+   ops.select_cpu() and then been queued on a wake_list via
+   ttwu_queue_wakelist(), when SCX_OPS_ALLOW_QUEUED_WAKEUP is set. Such
+   tasks are not on the runqueue and are not iterated by scx_bypass(),
+   so their direct dispatch state won't be cleared. Without this clear,
+   any subsequent SCX scheduler that tries to direct dispatch the task
+   will trigger the WARN_ON_ONCE() in mark_direct_dispatch().
+
+Fixes: 5b26f7b920f7 ("sched_ext: Allow SCX_DSQ_LOCAL_ON for direct dispatches")
+Cc: stable@vger.kernel.org # v6.12+
+Cc: Daniel Hodges <hodgesd@meta.com>
+Cc: Patrick Somaru <patsomaru@meta.com>
+Signed-off-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |   49 +++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 35 insertions(+), 14 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -1026,15 +1026,6 @@ static void dispatch_enqueue(struct scx_
+       p->scx.dsq = dsq;
+       /*
+-       * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
+-       * direct dispatch path, but we clear them here because the direct
+-       * dispatch verdict may be overridden on the enqueue path during e.g.
+-       * bypass.
+-       */
+-      p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
+-      p->scx.ddsp_enq_flags = 0;
+-
+-      /*
+        * We're transitioning out of QUEUEING or DISPATCHING. store_release to
+        * match waiters' load_acquire.
+        */
+@@ -1176,12 +1167,34 @@ static void mark_direct_dispatch(struct
+       p->scx.ddsp_enq_flags = enq_flags;
+ }
++/*
++ * Clear @p direct dispatch state when leaving the scheduler.
++ *
++ * Direct dispatch state must be cleared in the following cases:
++ *  - direct_dispatch(): cleared on the synchronous enqueue path, deferred
++ *    dispatch keeps the state until consumed
++ *  - process_ddsp_deferred_locals(): cleared after consuming deferred state,
++ *  - do_enqueue_task(): cleared on enqueue fallbacks where the dispatch
++ *    verdict is ignored (local/global/bypass)
++ *  - dequeue_task_scx(): cleared after dispatch_dequeue(), covering deferred
++ *    cancellation and holding_cpu races
++ *  - scx_disable_task(): cleared for queued wakeup tasks, which are excluded by
++ *    the scx_bypass() loop, so that stale state is not reused by a subsequent
++ *    scheduler instance
++ */
++static inline void clear_direct_dispatch(struct task_struct *p)
++{
++      p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
++      p->scx.ddsp_enq_flags = 0;
++}
++
+ static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
+                           u64 enq_flags)
+ {
+       struct rq *rq = task_rq(p);
+       struct scx_dispatch_q *dsq =
+               find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
++      u64 ddsp_enq_flags;
+       touch_core_sched_dispatch(rq, p);
+@@ -1222,8 +1235,10 @@ static void direct_dispatch(struct scx_s
+               return;
+       }
+-      dispatch_enqueue(sch, dsq, p,
+-                       p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
++      ddsp_enq_flags = p->scx.ddsp_enq_flags;
++      clear_direct_dispatch(p);
++
++      dispatch_enqueue(sch, dsq, p, ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
+ }
+ static bool scx_rq_online(struct rq *rq)
+@@ -1329,6 +1344,7 @@ enqueue:
+        */
+       touch_core_sched(rq, p);
+       refill_task_slice_dfl(sch, p);
++      clear_direct_dispatch(p);
+       dispatch_enqueue(sch, dsq, p, enq_flags);
+ }
+@@ -1496,6 +1512,7 @@ static bool dequeue_task_scx(struct rq *
+       sub_nr_running(rq, 1);
+       dispatch_dequeue(rq, p);
++      clear_direct_dispatch(p);
+       return true;
+ }
+@@ -2236,13 +2253,15 @@ static void process_ddsp_deferred_locals
+                               struct task_struct, scx.dsq_list.node))) {
+               struct scx_sched *sch = scx_root;
+               struct scx_dispatch_q *dsq;
++              u64 dsq_id = p->scx.ddsp_dsq_id;
++              u64 enq_flags = p->scx.ddsp_enq_flags;
+               list_del_init(&p->scx.dsq_list.node);
++              clear_direct_dispatch(p);
+-              dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
++              dsq = find_dsq_for_dispatch(sch, rq, dsq_id, p);
+               if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
+-                      dispatch_to_local_dsq(sch, rq, dsq, p,
+-                                            p->scx.ddsp_enq_flags);
++                      dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags);
+       }
+ }
+@@ -2881,6 +2900,8 @@ static void scx_disable_task(struct task
+       lockdep_assert_rq_held(rq);
+       WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
++      clear_direct_dispatch(p);
++
+       if (SCX_HAS_OP(sch, disable))
+               SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
+       scx_set_task_state(p, SCX_TASK_READY);
diff --git a/queue-6.18/sched_ext-refactor-do_enqueue_task-local-and-global-dsq-paths.patch b/queue-6.18/sched_ext-refactor-do_enqueue_task-local-and-global-dsq-paths.patch
new file mode 100644 (file)
index 0000000..4402f96
--- /dev/null
@@ -0,0 +1,71 @@
+From stable+bounces-233885-greg=kroah.com@vger.kernel.org Wed Apr  8 14:49:47 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed,  8 Apr 2026 08:47:40 -0400
+Subject: sched_ext: Refactor do_enqueue_task() local and global DSQ paths
+To: stable@vger.kernel.org
+Cc: Tejun Heo <tj@kernel.org>, Andrea Righi <arighi@nvidia.com>, Emil Tsalapatis <emil@etsalapatis.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260408124741.1019690-1-sashal@kernel.org>
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 3546119f18647d7ddbba579737d8a222b430cb1c ]
+
+The local and global DSQ enqueue paths in do_enqueue_task() share the same
+slice refill logic. Factor out the common code into a shared enqueue label.
+This makes adding new enqueue cases easier. No functional changes.
+
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Stable-dep-of: 7e0ffb72de8a ("sched_ext: Fix stale direct dispatch state in ddsp_dsq_id")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |   21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -1243,6 +1243,7 @@ static void do_enqueue_task(struct rq *r
+ {
+       struct scx_sched *sch = scx_root;
+       struct task_struct **ddsp_taskp;
++      struct scx_dispatch_q *dsq;
+       unsigned long qseq;
+       WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED));
+@@ -1310,8 +1311,17 @@ static void do_enqueue_task(struct rq *r
+ direct:
+       direct_dispatch(sch, p, enq_flags);
+       return;
+-
++local_norefill:
++      dispatch_enqueue(sch, &rq->scx.local_dsq, p, enq_flags);
++      return;
+ local:
++      dsq = &rq->scx.local_dsq;
++      goto enqueue;
++global:
++      dsq = find_global_dsq(sch, p);
++      goto enqueue;
++
++enqueue:
+       /*
+        * For task-ordering, slice refill must be treated as implying the end
+        * of the current slice. Otherwise, the longer @p stays on the CPU, the
+@@ -1319,14 +1329,7 @@ local:
+        */
+       touch_core_sched(rq, p);
+       refill_task_slice_dfl(sch, p);
+-local_norefill:
+-      dispatch_enqueue(sch, &rq->scx.local_dsq, p, enq_flags);
+-      return;
+-
+-global:
+-      touch_core_sched(rq, p);        /* see the comment in local: */
+-      refill_task_slice_dfl(sch, p);
+-      dispatch_enqueue(sch, find_global_dsq(sch, p), p, enq_flags);
++      dispatch_enqueue(sch, dsq, p, enq_flags);
+ }
+ static bool task_runnable(const struct task_struct *p)
index f8bce19b62924a535e14ad0e1e121a096794aaa7..d1d0c136a208d8c8c88d5775e967371fe083566f 100644 (file)
@@ -273,3 +273,5 @@ scsi-target-file-use-kzalloc_flex-for-aio_cmd.patch
 scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch
 mm-replace-read_once-with-standard-page-table-accessors.patch
 mm-memory-fix-pmd-pud-checks-in-follow_pfnmap_start.patch
+sched_ext-refactor-do_enqueue_task-local-and-global-dsq-paths.patch
+sched_ext-fix-stale-direct-dispatch-state-in-ddsp_dsq_id.patch