]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/fair: Fix value reported by hot tasks pulled in /proc/schedstat
authorPeter Zijlstra <peterz@infradead.org>
Fri, 20 Dec 2024 06:32:19 +0000 (06:32 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 20 Dec 2024 14:31:16 +0000 (15:31 +0100)
In /proc/schedstat, lb_hot_gained reports the number hot tasks pulled
during load balance. This value is incremented in can_migrate_task()
if the task is migratable and hot. After incrementing the value,
load balancer can still decide not to migrate this task leading to wrong
accounting. Fix this by incrementing stats when hot tasks are detached.
This issue only exists in detach_tasks() where we can decide to not
migrate hot task even if it is migratable. However, in detach_one_task(),
we migrate it unconditionally.

[Swapnil: Handled the case where nr_failed_migrations_hot was not accounted properly and wrote commit log]

Fixes: d31980846f96 ("sched: Move up affinity check to mitigate useless redoing overhead")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
Not-yet-signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241220063224.17767-2-swapnil.sapkal@amd.com
include/linux/sched.h
kernel/sched/fair.c

index b5916be49f624f13b7566077c4ff6d9f9622286c..8c6a2ed9f80ecb9519b61424f45d70aa2271a6a7 100644 (file)
@@ -937,6 +937,7 @@ struct task_struct {
        unsigned                        sched_reset_on_fork:1;
        unsigned                        sched_contributes_to_load:1;
        unsigned                        sched_migrated:1;
+       unsigned                        sched_task_hot:1;
 
        /* Force alignment to the next boundary: */
        unsigned                        :0;
index ae8095aa458567c41cc35ac91f8fc2d4df5b536c..8fc6648a0aa8ec26f15d8cce68ad111b2f561746 100644 (file)
@@ -9396,6 +9396,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
        int tsk_cache_hot;
 
        lockdep_assert_rq_held(env->src_rq);
+       if (p->sched_task_hot)
+               p->sched_task_hot = 0;
 
        /*
         * We do not migrate tasks that are:
@@ -9472,10 +9474,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
        if (tsk_cache_hot <= 0 ||
            env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
-               if (tsk_cache_hot == 1) {
-                       schedstat_inc(env->sd->lb_hot_gained[env->idle]);
-                       schedstat_inc(p->stats.nr_forced_migrations);
-               }
+               if (tsk_cache_hot == 1)
+                       p->sched_task_hot = 1;
                return 1;
        }
 
@@ -9490,6 +9490,12 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 {
        lockdep_assert_rq_held(env->src_rq);
 
+       if (p->sched_task_hot) {
+               p->sched_task_hot = 0;
+               schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+               schedstat_inc(p->stats.nr_forced_migrations);
+       }
+
        deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
        set_task_cpu(p, env->dst_cpu);
 }
@@ -9650,6 +9656,9 @@ static int detach_tasks(struct lb_env *env)
 
                continue;
 next:
+               if (p->sched_task_hot)
+                       schedstat_inc(p->stats.nr_failed_migrations_hot);
+
                list_move(&p->se.group_node, tasks);
        }