From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Wed, 1 Apr 2026 21:52:21 +0000 (-0700)
Subject: sched/cache: Calculate the percpu sd task LLC preference
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=82c960aee304bf286552046b66d5b0b3933b2418;p=thirdparty%2Fkernel%2Flinux.git

sched/cache: Calculate the percpu sd task LLC preference

Calculate the number of tasks' LLC preferences for each runqueue.
This statistic is computed during task enqueue and dequeue
operations, and is used by the cache-aware load balancing.

Co-developed-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/d15a64436d3acd19c5c53344c5e9d3d0b79b3233.1775065312.git.tim.c.chen@linux.intel.com
---

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e66da7a6be3e6..7d52cf0b85bd8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1386,6 +1386,7 @@ static int llc_id(int cpu)
 
 static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
 {
+	struct sched_domain *sd;
 	int pref_llc;
 
 	pref_llc = p->preferred_llc;
@@ -1394,10 +1395,15 @@ static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
 
 	rq->nr_llc_running++;
 	rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+
+	sd = rcu_dereference_all(rq->sd);
+	if (sd && (unsigned int)pref_llc < sd->llc_max)
+		sd->llc_counts[pref_llc]++;
 }
 
 static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
 {
+	struct sched_domain *sd;
 	int pref_llc;
 
 	pref_llc = p->preferred_llc;
@@ -1406,6 +1412,24 @@ static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
 
 	rq->nr_llc_running--;
 	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+
+	sd = rcu_dereference_all(rq->sd);
+	if (sd && (unsigned int)pref_llc < sd->llc_max) {
+		/*
+		 * There is a race condition between dequeue
+		 * and CPU hotplug. After a task has been enqueued
+		 * on CPUx, a CPU hotplug event occurs, and all online
+		 * CPUs (including CPUx) rebuild their sched_domains
+		 * and reset statistics to zero(including sd->llc_counts).
+		 * This can cause temporary undercount and we have to
+		 * check for such underflow in sd->llc_counts.
+		 *
+		 * This undercount is temporary and accurate accounting
+		 * will resume once the rq has a chance to be idle.
+		 */
+		if (sd->llc_counts[pref_llc])
+			sd->llc_counts[pref_llc]--;
+	}
 }
 
 void mm_init_sched(struct mm_struct *mm,