sched/cache: Calculate the percpu sd task LLC preference

author Tim Chen <tim.c.chen@linux.intel.com>

Wed, 1 Apr 2026 21:52:21 +0000 (14:52 -0700)

committer Peter Zijlstra <peterz@infradead.org>

Thu, 9 Apr 2026 13:49:49 +0000 (15:49 +0200)
author Tim Chen <tim.c.chen@linux.intel.com>
Wed, 1 Apr 2026 21:52:21 +0000 (14:52 -0700)
committer Peter Zijlstra <peterz@infradead.org>
Thu, 9 Apr 2026 13:49:49 +0000 (15:49 +0200)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index e66da7a6be3e6a893227ca890b532ed044ddcbcc..7d52cf0b85bd8aaf27ed2fdad3813b1c9f65a2ae 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1386,6 +1386,7 @@ static int llc_id(int cpu)
  
  static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
  {
+       struct sched_domain *sd;
         int pref_llc;
  
         pref_llc = p->preferred_llc;
@@ -1394,10 +1395,15 @@ static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
  
         rq->nr_llc_running++;
         rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+
+       sd = rcu_dereference_all(rq->sd);
+       if (sd && (unsigned int)pref_llc < sd->llc_max)
+               sd->llc_counts[pref_llc]++;
  }
  
  static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
  {
+       struct sched_domain *sd;
         int pref_llc;
  
         pref_llc = p->preferred_llc;
@@ -1406,6 +1412,24 @@ static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
  
         rq->nr_llc_running--;
         rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+
+       sd = rcu_dereference_all(rq->sd);
+       if (sd && (unsigned int)pref_llc < sd->llc_max) {
+               /*
+                * There is a race condition between dequeue
+                * and CPU hotplug. After a task has been enqueued
+                * on CPUx, a CPU hotplug event occurs, and all online
+                * CPUs (including CPUx) rebuild their sched_domains
+                * and reset statistics to zero(including sd->llc_counts).
+                * This can cause temporary undercount and we have to
+                * check for such underflow in sd->llc_counts.
+                *
+                * This undercount is temporary and accurate accounting
+                * will resume once the rq has a chance to be idle.
+                */
+               if (sd->llc_counts[pref_llc])
+                       sd->llc_counts[pref_llc]--;
+       }
  }
  
  void mm_init_sched(struct mm_struct *mm,
author	Tim Chen <tim.c.chen@linux.intel.com>
	Wed, 1 Apr 2026 21:52:21 +0000 (14:52 -0700)
committer	Peter Zijlstra <peterz@infradead.org>
	Thu, 9 Apr 2026 13:49:49 +0000 (15:49 +0200)