]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched_ext: Decouple kfunc unlocked-context check from kf_mask
authorTejun Heo <tj@kernel.org>
Fri, 10 Apr 2026 17:54:06 +0000 (07:54 -1000)
committerTejun Heo <tj@kernel.org>
Fri, 10 Apr 2026 17:54:06 +0000 (07:54 -1000)
scx_kf_allowed_if_unlocked() uses !current->scx.kf_mask as a proxy for "no
SCX-tracked lock held". kf_mask is removed in a follow-up patch, so its two
callers - select_cpu_from_kfunc() and scx_dsq_move() - need another basis.

Add a new bool scx_rq.in_select_cpu, set across the SCX_CALL_OP_TASK_RET
that invokes ops.select_cpu(), to capture the one case where SCX itself
holds no lock but try_to_wake_up() holds @p's pi_lock. Together with
scx_locked_rq(), it expresses the same accepted-context set.

select_cpu_from_kfunc() needs a runtime test because it has to take
different locking paths depending on context. Open-code as a three-way
branch. The unlocked branch takes raw_spin_lock_irqsave(&p->pi_lock)
directly - pi_lock alone is enough for the fields the kfunc reads, and is
lighter than task_rq_lock().

scx_dsq_move() doesn't really need a runtime test - its accepted contexts
could be enforced at verifier load time. But since the runtime state is
already there and using it keeps the upcoming load-time filter simpler, just
write it the same way: (scx_locked_rq() || in_select_cpu) &&
!kf_allowed(DISPATCH).

scx_kf_allowed_if_unlocked() is deleted with the conversions.

No semantic change.

v2: s/No functional change/No semantic change/ - the unlocked path now acquires
    pi_lock instead of the heavier task_rq_lock() (Andrea Righi).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
kernel/sched/ext.c
kernel/sched/ext_idle.c
kernel/sched/ext_internal.h
kernel/sched/sched.h

index 4d793a56d96527e3712c2a4a97cb20db0a0c1e73..fa266fd7afd55aeab220a6d88a68a7067b065bf2 100644 (file)
@@ -3308,10 +3308,12 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
                WARN_ON_ONCE(*ddsp_taskp);
                *ddsp_taskp = p;
 
+               this_rq()->scx.in_select_cpu = true;
                cpu = SCX_CALL_OP_TASK_RET(sch,
                                           SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
                                           select_cpu, NULL, p, prev_cpu,
                                           wake_flags);
+               this_rq()->scx.in_select_cpu = false;
                p->scx.selected_cpu = cpu;
                *ddsp_taskp = NULL;
                if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
@@ -8144,7 +8146,7 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
        bool in_balance;
        unsigned long flags;
 
-       if (!scx_kf_allowed_if_unlocked() &&
+       if ((scx_locked_rq() || this_rq()->scx.in_select_cpu) &&
            !scx_kf_allowed(sch, SCX_KF_DISPATCH))
                return false;
 
index 8c31fb65477cfea6a4c37bfab07fffafd02949d6..f99ceeba2e56c379f6bc8ef845049cf9aa9821de 100644 (file)
@@ -913,8 +913,8 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
                                 s32 prev_cpu, u64 wake_flags,
                                 const struct cpumask *allowed, u64 flags)
 {
-       struct rq *rq;
-       struct rq_flags rf;
+       unsigned long irq_flags;
+       bool we_locked = false;
        s32 cpu;
 
        if (!ops_cpu_valid(sch, prev_cpu, NULL))
@@ -924,27 +924,22 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
                return -EBUSY;
 
        /*
-        * If called from an unlocked context, acquire the task's rq lock,
-        * so that we can safely access p->cpus_ptr and p->nr_cpus_allowed.
+        * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq
+        * lock or @p's pi_lock. Three cases:
         *
-        * Otherwise, allow to use this kfunc only from ops.select_cpu()
-        * and ops.select_enqueue().
+        *  - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock.
+        *  - other rq-locked SCX op: scx_locked_rq() points at the held rq.
+        *  - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops):
+        *    nothing held, take pi_lock ourselves.
         */
-       if (scx_kf_allowed_if_unlocked()) {
-               rq = task_rq_lock(p, &rf);
-       } else {
-               if (!scx_kf_allowed(sch, SCX_KF_SELECT_CPU | SCX_KF_ENQUEUE))
-                       return -EPERM;
-               rq = scx_locked_rq();
-       }
-
-       /*
-        * Validate locking correctness to access p->cpus_ptr and
-        * p->nr_cpus_allowed: if we're holding an rq lock, we're safe;
-        * otherwise, assert that p->pi_lock is held.
-        */
-       if (!rq)
+       if (this_rq()->scx.in_select_cpu) {
                lockdep_assert_held(&p->pi_lock);
+       } else if (!scx_locked_rq()) {
+               raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
+               we_locked = true;
+       } else if (!scx_kf_allowed(sch, SCX_KF_ENQUEUE)) {
+               return -EPERM;
+       }
 
        /*
         * This may also be called from ops.enqueue(), so we need to handle
@@ -963,8 +958,8 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
                                         allowed ?: p->cpus_ptr, flags);
        }
 
-       if (scx_kf_allowed_if_unlocked())
-               task_rq_unlock(rq, p, &rf);
+       if (we_locked)
+               raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
 
        return cpu;
 }
index b4f36d8b9c1ddfafb9fbad91ca8f5aebc2af76e0..54da08a223b7cdb78b16fcbe244197e058256c88 100644 (file)
@@ -1372,11 +1372,6 @@ static inline struct rq *scx_locked_rq(void)
        return __this_cpu_read(scx_locked_rq_state);
 }
 
-static inline bool scx_kf_allowed_if_unlocked(void)
-{
-       return !current->scx.kf_mask;
-}
-
 static inline bool scx_bypassing(struct scx_sched *sch, s32 cpu)
 {
        return unlikely(per_cpu_ptr(sch->pcpu, cpu)->flags &
index ae0783e27c1ece4b6670329185255a45bfa45e8e..0b6a177fd597b6b3344c7e15509861bed98eaadd 100644 (file)
@@ -798,6 +798,7 @@ struct scx_rq {
        u64                     extra_enq_flags;        /* see move_task_to_local_dsq() */
        u32                     nr_running;
        u32                     cpuperf_target;         /* [0, SCHED_CAPACITY_SCALE] */
+       bool                    in_select_cpu;
        bool                    cpu_released;
        u32                     flags;
        u32                     nr_immed;               /* ENQ_IMMED tasks on local_dsq */