From: Chen Yu Date: Wed, 13 May 2026 20:39:21 +0000 (-0700) Subject: sched/cache: Fix unpaired account_llc_enqueue/dequeue X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=03755348b8e74421f92ffed9da159175a698290b;p=thirdparty%2Fkernel%2Flinux.git sched/cache: Fix unpaired account_llc_enqueue/dequeue There is a race condition that, after a task is enqueued on a runqueue, task_llc(p) may change due to CPU hotplug, because the llc_id is dynamically allocated and adjusted at runtime. Therefore, checking task_llc(p) to determine whether the task is being dequeued from its preferred LLC is unreliable and can cause inconsistent values. To fix this problem, record whether p is enqueued on its preferred LLC, in order to pair with account_llc_dequeue() to maintain a consistent nr_pref_llc_running per runqueue. This bug was reported by sashiko, and the solution was once suggested by Prateek. Fixes: 46afe3af7ead ("sched/cache: Track LLC-preferred tasks per runqueue") Suggested-by: K Prateek Nayak Signed-off-by: Chen Yu Co-developed-by: Tim Chen Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/0c8c6a1571d66792a4d2ff0103ba3cc13e059046.1778703694.git.tim.c.chen@linux.intel.com --- diff --git a/include/linux/sched.h b/include/linux/sched.h index 95729670929cd..2c9e8e2edde1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1410,6 +1410,8 @@ struct task_struct { #ifdef CONFIG_SCHED_CACHE struct callback_head cache_work; int preferred_llc; + /* 1: task was enqueued to its preferred LLC, 0 otherwise */ + int pref_llc_queued; #endif struct rseq_data rseq; diff --git a/init/init_task.c b/init/init_task.c index 5d90db4ff1f8b..3ecd66fbd563b 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -217,6 +217,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #endif #ifdef CONFIG_SCHED_CACHE .preferred_llc = -1, + .pref_llc_queued = 0, #endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) .kasan_depth = 1, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 087445ea6bc95..96c61ce366c2c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1472,15 +1472,32 @@ static bool invalid_llc_nr(struct mm_struct *mm, struct task_struct *p, static void account_llc_enqueue(struct rq *rq, struct task_struct *p) { + int pref_llc, pref_llc_queued; struct sched_domain *sd; - int pref_llc; pref_llc = p->preferred_llc; if (pref_llc < 0) return; + pref_llc_queued = (pref_llc == task_llc(p)); rq->nr_llc_running++; - rq->nr_pref_llc_running += (pref_llc == task_llc(p)); + rq->nr_pref_llc_running += pref_llc_queued; + + /* + * Record whether p is enqueued on its preferred + * LLC, in order to pair with account_llc_dequeue() + * to maintain a consistent nr_pref_llc_running per + * runqueue. + * This is necessary because a race condition exists: + * after a task is enqueued on a runqueue, task_llc(p) + * may change due to CPU hotplug. Therefore, checking + * task_llc(p) to determine whether the task is being + * dequeued from its preferred LLC is unreliable and + * can cause inconsistent values - checking the + * p->pref_llc_queued in account_llc_dequeue() would + * be reliable. + */ + p->pref_llc_queued = pref_llc_queued; sd = rcu_dereference_all(rq->sd); if (sd && (unsigned int)pref_llc < sd->llc_max) @@ -1497,7 +1514,15 @@ static void account_llc_dequeue(struct rq *rq, struct task_struct *p) return; rq->nr_llc_running--; - rq->nr_pref_llc_running -= (pref_llc == task_llc(p)); + if (p->pref_llc_queued) { + rq->nr_pref_llc_running--; + /* + * Update the status in case + * other logic might query + * this. + */ + p->pref_llc_queued = 0; + } sd = rcu_dereference_all(rq->sd); if (sd && (unsigned int)pref_llc < sd->llc_max) {