sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable

author Tejun Heo <tj@kernel.org>

Sat, 31 Aug 2024 08:02:34 +0000 (22:02 -1000)

committer Tejun Heo <tj@kernel.org>

Wed, 4 Sep 2024 20:23:32 +0000 (10:23 -1000)
author Tejun Heo <tj@kernel.org>
Sat, 31 Aug 2024 08:02:34 +0000 (22:02 -1000)
committer Tejun Heo <tj@kernel.org>
Wed, 4 Sep 2024 20:23:32 +0000 (10:23 -1000)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h

index 4df2f90555879d08d9be8fc51c77ba9b5f2e3579..0f2aeb37bbb047335a399326b31bc8df81b75a3a 100644 (file)
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -120,6 +120,11 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
         return t;
  }
  
+static inline struct task_struct *tryget_task_struct(struct task_struct *t)
+{
+       return refcount_inc_not_zero(&t->usage) ? t : NULL;
+}
+
  extern void __put_task_struct(struct task_struct *t);
  extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
  
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index c04fc6e3ddb4a274f6ab3cd87a7ddda176ff79c4..5fd2bfc01403304e7e7bfa93a851f2b8362588da 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1240,11 +1240,10 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
   * whether they would like to filter out dead tasks. See scx_task_iter_init()
   * for details.
   */
-static struct task_struct *
-scx_task_iter_next_locked(struct scx_task_iter *iter, bool include_dead)
+static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
  {
         struct task_struct *p;
-retry:
+
         scx_task_iter_rq_unlock(iter);
  
         while ((p = scx_task_iter_next(iter))) {
@@ -1282,16 +1281,6 @@ retry:
         iter->rq = task_rq_lock(p, &iter->rf);
         iter->locked = p;
  
-       /*
-        * If we see %TASK_DEAD, @p already disabled preemption, is about to do
-        * the final __schedule(), won't ever need to be scheduled again and can
-        * thus be safely ignored. If we don't see %TASK_DEAD, @p can't enter
-        * the final __schedle() while we're locking its rq and thus will stay
-        * alive until the rq is unlocked.
-        */
-       if (!include_dead && READ_ONCE(p->__state) == TASK_DEAD)
-               goto retry;
-
         return p;
  }
  
@@ -4001,7 +3990,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
          * The BPF scheduler is going away. All tasks including %TASK_DEAD ones
          * must be switched out and exited synchronously.
          */
-       while ((p = scx_task_iter_next_locked(&sti, true))) {
+       while ((p = scx_task_iter_next_locked(&sti))) {
                 const struct sched_class *old_class = p->sched_class;
                 struct sched_enq_and_set_ctx ctx;
  
@@ -4632,8 +4621,15 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
         spin_lock_irq(&scx_tasks_lock);
  
         scx_task_iter_init(&sti);
-       while ((p = scx_task_iter_next_locked(&sti, false))) {
-               get_task_struct(p);
+       while ((p = scx_task_iter_next_locked(&sti))) {
+               /*
+                * @p may already be dead, have lost all its usages counts and
+                * be waiting for RCU grace period before being freed. @p can't
+                * be initialized for SCX in such cases and should be ignored.
+                */
+               if (!tryget_task_struct(p))
+                       continue;
+
                 scx_task_iter_rq_unlock(&sti);
                 spin_unlock_irq(&scx_tasks_lock);
  
@@ -4686,7 +4682,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
         WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL));
  
         scx_task_iter_init(&sti);
-       while ((p = scx_task_iter_next_locked(&sti, false))) {
+       while ((p = scx_task_iter_next_locked(&sti))) {
                 const struct sched_class *old_class = p->sched_class;
                 struct sched_enq_and_set_ctx ctx;
author	Tejun Heo <tj@kernel.org>
	Sat, 31 Aug 2024 08:02:34 +0000 (22:02 -1000)
committer	Tejun Heo <tj@kernel.org>
	Wed, 4 Sep 2024 20:23:32 +0000 (10:23 -1000)
include/linux/sched/task.h		patch \| blob \| blame \| history
kernel/sched/ext.c		patch \| blob \| blame \| history