]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched: Split DEQUEUE_SLEEP from deactivate_task()
authorPeter Zijlstra <peterz@infradead.org>
Thu, 23 May 2024 08:48:09 +0000 (10:48 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Sat, 17 Aug 2024 09:06:42 +0000 (11:06 +0200)
As a preparation for dequeue_task() failing, and a second code-path
needing to take care of the 'success' path, split out the DEQEUE_SLEEP
path from deactivate_task().

Much thanks to Libo for spotting and fixing a TASK_ON_RQ_MIGRATING
ordering fail.

Fixed-by: Libo Chen <libo.chen@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.086192709@infradead.org
kernel/sched/core.c
kernel/sched/sched.h

index 4f7a4e98f22d69cac813a01150b5b7b75fdd766d..6c595485bcbc8f33f6999d49e396f9b5b3092f32 100644 (file)
@@ -2036,12 +2036,23 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
 
 void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
-       WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING);
+       WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
        ASSERT_EXCLUSIVE_WRITER(p->on_rq);
 
+       /*
+        * Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before*
+        * dequeue_task() and cleared *after* enqueue_task().
+        */
+
        dequeue_task(rq, p, flags);
 }
 
+static void block_task(struct rq *rq, struct task_struct *p, int flags)
+{
+       if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags))
+               __block_task(rq, p);
+}
+
 /**
  * task_curr - is this task currently executing on a CPU?
  * @p: the task in question.
@@ -6498,9 +6509,6 @@ static void __sched notrace __schedule(unsigned int sched_mode)
                                !(prev_state & TASK_NOLOAD) &&
                                !(prev_state & TASK_FROZEN);
 
-                       if (prev->sched_contributes_to_load)
-                               rq->nr_uninterruptible++;
-
                        /*
                         * __schedule()                 ttwu()
                         *   prev_state = prev->state;    if (p->on_rq && ...)
@@ -6512,12 +6520,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
                         *
                         * After this, schedule() must not care about p->state any more.
                         */
-                       deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
-
-                       if (prev->in_iowait) {
-                               atomic_inc(&rq->nr_iowait);
-                               delayacct_blkio_start();
-                       }
+                       block_task(rq, prev, DEQUEUE_NOCLOCK);
                }
                switch_count = &prev->nvcsw;
        }
index 6196f90df93e18cafb02ebd471b358ff5087345e..69ab3b0289c06e588ae487124baa1a731c91a95a 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/wait_api.h>
 #include <linux/wait_bit.h>
 #include <linux/workqueue_api.h>
+#include <linux/delayacct.h>
 
 #include <trace/events/power.h>
 #include <trace/events/sched.h>
@@ -2585,6 +2586,19 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
        sched_update_tick_dependency(rq);
 }
 
+static inline void __block_task(struct rq *rq, struct task_struct *p)
+{
+       WRITE_ONCE(p->on_rq, 0);
+       ASSERT_EXCLUSIVE_WRITER(p->on_rq);
+       if (p->sched_contributes_to_load)
+               rq->nr_uninterruptible++;
+
+       if (p->in_iowait) {
+               atomic_inc(&rq->nr_iowait);
+               delayacct_blkio_start();
+       }
+}
+
 extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
 extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);