sched/cache: Record per LLC utilization to guide cache aware scheduling decisions

author Chen Yu <yu.c.chen@intel.com>

Wed, 1 Apr 2026 21:52:15 +0000 (14:52 -0700)

committer Peter Zijlstra <peterz@infradead.org>

Thu, 9 Apr 2026 13:49:48 +0000 (15:49 +0200)
author Chen Yu <yu.c.chen@intel.com>
Wed, 1 Apr 2026 21:52:15 +0000 (14:52 -0700)
committer Peter Zijlstra <peterz@infradead.org>
Thu, 9 Apr 2026 13:49:48 +0000 (15:49 +0200)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h

index 36553e14866d0dffdbb57afbfb0e8a7fc80a8e40..159716fa0d3aae8261def11e69d0cbda518baa84 100644 (file)
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -68,6 +68,10 @@ struct sched_domain_shared {
         atomic_t        nr_busy_cpus;
         int             has_idle_cores;
         int             nr_idle_scan;
+#ifdef CONFIG_SCHED_CACHE
+       unsigned long   util_avg;
+       unsigned long   capacity;
+#endif
  };
  
  struct sched_domain {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index a55ada22e40c7d67f4c8d6f80a80db8ea8d2e7a9..6647d465b59eee5972d23cdabf1035558de0b00b 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9992,6 +9992,28 @@ static inline int task_is_ineligible_on_dst_cpu(struct task_struct *p, int dest_
         return 0;
  }
  
+#ifdef CONFIG_SCHED_CACHE
+static __maybe_unused bool get_llc_stats(int cpu, unsigned long *util,
+                                        unsigned long *cap)
+{
+       struct sched_domain_shared *sd_share;
+
+       sd_share = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
+       if (!sd_share)
+               return false;
+
+       *util = READ_ONCE(sd_share->util_avg);
+       *cap = READ_ONCE(sd_share->capacity);
+
+       return true;
+}
+#else
+static inline bool get_llc_stats(int cpu, unsigned long *util,
+                                unsigned long *cap)
+{
+       return false;
+}
+#endif
  /*
   * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
   */
@@ -10948,6 +10970,53 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
         return check_cpu_capacity(rq, sd);
  }
  
+#ifdef CONFIG_SCHED_CACHE
+/*
+ * Record the statistics for this scheduler group for later
+ * use. These values guide load balancing on aggregating tasks
+ * to a LLC.
+ */
+static void record_sg_llc_stats(struct lb_env *env,
+                               struct sg_lb_stats *sgs,
+                               struct sched_group *group)
+{
+       struct sched_domain_shared *sd_share;
+       int cpu;
+
+       if (!sched_cache_enabled() || env->idle == CPU_NEWLY_IDLE)
+               return;
+
+       /* Only care about sched domain spanning multiple LLCs */
+       if (env->sd->child != rcu_dereference_all(per_cpu(sd_llc, env->dst_cpu)))
+               return;
+
+       /*
+        * At this point we know this group spans a LLC domain.
+        * Record the statistic of this group in its corresponding
+        * shared LLC domain.
+        * Note: sd_share cannot be obtained via sd->child->shared,
+        * because the latter refers to the domain that covers the
+        * local group. Instead, sd_share should be located using
+        * the first CPU of the LLC group.
+        */
+       cpu = cpumask_first(sched_group_span(group));
+       sd_share = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
+       if (!sd_share)
+               return;
+
+       if (READ_ONCE(sd_share->util_avg) != sgs->group_util)
+               WRITE_ONCE(sd_share->util_avg, sgs->group_util);
+
+       if (unlikely(READ_ONCE(sd_share->capacity) != sgs->group_capacity))
+               WRITE_ONCE(sd_share->capacity, sgs->group_capacity);
+}
+#else
+static inline void record_sg_llc_stats(struct lb_env *env, struct sg_lb_stats *sgs,
+                                      struct sched_group *group)
+{
+}
+#endif
+
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @env: The load balancing environment.
@@ -11035,6 +11104,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  
         sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
  
+       record_sg_llc_stats(env, sgs, group);
         /* Computing avg_load makes sense only when group is overloaded */
         if (sgs->group_type == group_overloaded)
                 sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
author	Chen Yu <yu.c.chen@intel.com>
	Wed, 1 Apr 2026 21:52:15 +0000 (14:52 -0700)
committer	Peter Zijlstra <peterz@infradead.org>
	Thu, 9 Apr 2026 13:49:48 +0000 (15:49 +0200)
include/linux/sched/topology.h		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history