]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/fair: Reject misfit pulls onto busy SMT siblings on asym-capacity
authorAndrea Righi <arighi@nvidia.com>
Sat, 9 May 2026 18:07:28 +0000 (20:07 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 19 May 2026 10:17:38 +0000 (12:17 +0200)
When SD_ASYM_CPUCAPACITY load balancing considers pulling a misfit task,
capacity_of(dst_cpu) can overstate available compute if the SMT sibling is
busy: the core does not deliver its full nominal capacity.

If SMT is active and dst_cpu is not on a fully idle core, skip this
destination so we do not migrate a misfit expecting a capacity upgrade we
cannot actually provide.

Reported-by: Felix Abecassis <fabecassis@nvidia.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://patch.msgid.link/20260509180955.1840064-5-arighi@nvidia.com
kernel/sched/fair.c

index 8854d4d980b05a18b0a66ff327cc013a7af2b658..f69ee5ae2b8c9d420219ee158047e09b61364e3a 100644 (file)
@@ -9625,6 +9625,7 @@ struct lb_env {
 
        int                     dst_cpu;
        struct rq               *dst_rq;
+       bool                    dst_core_idle;
 
        struct cpumask          *dst_grpmask;
        int                     new_dst_cpu;
@@ -10850,10 +10851,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
         * We can use max_capacity here as reduction in capacity on some
         * CPUs in the group should either be possible to resolve
         * internally or be covered by avg_load imbalance (eventually).
+        *
+        * When SMT is active, only pull a misfit to dst_cpu if it is on a
+        * fully idle core; otherwise the effective capacity of the core is
+        * reduced and we may not actually provide more capacity than the
+        * source.
         */
        if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
            (sgs->group_type == group_misfit_task) &&
-           (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
+           (!env->dst_core_idle ||
+            !capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
             sds->local_stat.group_type != group_has_spare))
                return false;
 
@@ -11417,6 +11424,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
        unsigned long sum_util = 0;
        bool sg_overloaded = 0, sg_overutilized = 0;
 
+       env->dst_core_idle = !sched_smt_active() || is_core_idle(env->dst_cpu);
+
        do {
                struct sg_lb_stats *sgs = &tmp_sgs;
                int local_group;