From: Sasha Levin Date: Tue, 26 May 2026 11:34:37 +0000 (-0400) Subject: Fixes for all trees X-Git-Tag: v5.10.258~60 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=e10139970d0ee93024714e221ea02f1ddab4cc9f;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for all trees Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch new file mode 100644 index 0000000000..6b3cecccb5 --- /dev/null +++ b/queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch @@ -0,0 +1,42 @@ +From 419a3a65767d3f679f6ca7369fc3d5a0e83832a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 20:46:05 -0400 +Subject: Revert "x86/vdso: Fix output operand size of RDPID" + +This reverts commit 757a9e78a1c5b824d0a2b7de14c3cd8d841dfbee. + +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/segment.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h +index 7865f180eb087..2e7890dd58a47 100644 +--- a/arch/x86/include/asm/segment.h ++++ b/arch/x86/include/asm/segment.h +@@ -243,7 +243,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) + + static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + { +- unsigned long p; ++ unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP +@@ -253,10 +253,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + * + * If RDPID is available, use it. + */ +- alternative_io ("lsl %[seg],%k[p]", +- "rdpid %[p]", ++ alternative_io ("lsl %[seg],%[p]", ++ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, +- [p] "=r" (p), [seg] "r" (__CPUNODE_SEG)); ++ [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); +-- +2.53.0 + diff --git a/queue-6.1/series b/queue-6.1/series index 2d89ee749e..55ebc556d7 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -833,3 +833,4 @@ net-rds-reset-op_nents-when-zerocopy-page-pin-fails.patch io_uring-prevent-opcode-speculation.patch s390-debug-reject-zero-length-input-before-trimming-.patch wifi-mac80211-check-tdls-flag-in-ieee80211_tdls_oper.patch +revert-x86-vdso-fix-output-operand-size-of-rdpid.patch diff --git a/queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch new file mode 100644 index 0000000000..075a253292 --- /dev/null +++ b/queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch @@ -0,0 +1,260 @@ +From b2ecb0aacf48f499ce941e3aff0841f6ff44b069 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Apr 2026 23:08:56 +0900 +Subject: ksmbd: close durable scavenger races against m_fp_list lookups + +From: DaeMyung Kang + +[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ] + +ksmbd_durable_scavenger() has two related races against any walker +that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode() +(used by ksmbd_vfs_rename) and the share-mode checks in +fs/smb/server/smb_common.c. + +(1) fp->node list-head reuse. Durable-preserved handles can remain +linked on f_ci->m_fp_list after session teardown so share-mode checks +still see them while the handle is reconnectable. The scavenger +collected expired handles by adding fp->node to a local +scavenger_list after removing them from the global durable idr. +Because fp->node is the same list_head used by m_fp_list, +list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links +and corrupts both lists. CONFIG_DEBUG_LIST can report this on the +share-mode walk path. + +(2) Refcount race against m_fp_list walkers. The scavenger qualifies +an expired durable handle with atomic_read(&fp->refcount) > 1 and +fp->conn under global_ft.lock, removes fp from global_ft, then drops +global_ft.lock before unlinking fp from m_fp_list and freeing it. +During that gap fp is still linked on m_fp_list with f_state == +FP_INITED. ksmbd_lookup_fd_inode() under m_lock read calls +ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and +takes a live reference; the scavenger then unlinks and frees fp +while the holder owns a reference, leading to UAF on the holder's +subsequent ksmbd_fd_put() and on any field reads performed by a +concurrent share-mode walker that iterates m_fp_list without taking +ksmbd_fp_get() (smb_check_perm_dleases-like paths). + +Fix both: + + * Stop reusing fp->node as a scavenger-private list node. Remove + one expired handle from global_ft under global_ft.lock, take an + explicit transient reference, drop the lock, unlink fp->node + from m_fp_list under f_ci->m_lock, then drop both the durable + lifetime and transient references with atomic_sub_and_test(2, + &fp->refcount). If the scavenger is the last putter the close + runs there; otherwise an in-flight holder that already raced + through the m_fp_list lookup owns the final close via its + ksmbd_fd_put() path. The one-at-a-time disposal can rescan the + durable idr when multiple handles expire in the same pass, but + durable scavenging is a background expiration path and the final + full scan recomputes min_timeout before the next wait. + + * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right + after idr_remove(), so a delayed final close from a holder that + snatched fp does not re-issue idr_remove() on a persistent id + that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have + already handed out to a brand-new durable handle. + + * Bypass the per-conn open_files_count decrement in + __put_fd_final() when fp is detached from any session table + (fp->conn cleared by session_fd_check() at durable preserve -- + paired with the volatile_id clear at unpublish, so checking + fp->conn alone is sufficient). The walker that owns the final + close runs from an unrelated work->conn whose + stats.open_files_count never tracked this durable fp; without + this guard the holder would underflow that unrelated counter. + +The two races are folded into one patch because patch (1) alone +cleans up the corrupted list but leaves a deterministic UAF window +for m_fp_list walkers that the transient-reference and +persistent_id discipline in (2) close; bisecting onto an +intermediate state would land on a UAF that pre-patch chaos merely +made less reproducible. + +Validation: + * CONFIG_DEBUG_LIST coverage for the list_head reuse path. + * KASAN-enabled direct SMB2 durable-handle coverage that exercised + ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode() + returns while durable handles expired under concurrent rename + lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING + reports. + * checkpatch --strict + * make -j$(nproc) M=fs/smb/server + +Fixes: d484d621d40f ("ksmbd: add durable scavenger timer") +Signed-off-by: DaeMyung Kang +Acked-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/server/vfs_cache.c | 104 ++++++++++++++++++++++++++++---------- + 1 file changed, 77 insertions(+), 27 deletions(-) + +diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c +index d29cc1d01bd2c..a8fed467e9b69 100644 +--- a/fs/smb/server/vfs_cache.c ++++ b/fs/smb/server/vfs_cache.c +@@ -325,6 +325,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) + return; + + idr_remove(global_ft.idr, fp->persistent_id); ++ /* ++ * Clear persistent_id so a later __ksmbd_close_fd() that runs from a ++ * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() ++ * walker held the final reference) does not re-issue idr_remove() on ++ * an id that idr_alloc_cyclic() may have already handed out to a new ++ * durable handle. ++ */ ++ fp->persistent_id = KSMBD_NO_FID; + } + + static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) +@@ -417,6 +425,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, + + static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) + { ++ /* ++ * Detached durable fp -- session_fd_check() cleared fp->conn at ++ * preserve, so this fp is no longer tracked by any conn's ++ * stats.open_files_count. This happens when ++ * ksmbd_scavenger_dispose_dh() hands the final close off to an ++ * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn ++ * is unrelated to the conn that originally opened the handle; close ++ * via the NULL-ft path so we do not underflow that unrelated ++ * counter. ++ */ ++ if (!fp->conn) { ++ __ksmbd_close_fd(NULL, fp); ++ return; ++ } + __ksmbd_close_fd(&work->sess->file_table, fp); + atomic_dec(&work->conn->stats.open_files_count); + } +@@ -788,24 +810,37 @@ static bool ksmbd_durable_scavenger_alive(void) + return true; + } + +-static void ksmbd_scavenger_dispose_dh(struct list_head *head) ++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) + { +- while (!list_empty(head)) { +- struct ksmbd_file *fp; ++ /* ++ * Durable-preserved fp can remain linked on f_ci->m_fp_list for ++ * share-mode checks. Unlink it before final close; fp->node is not ++ * available as a scavenger-private list node because re-adding it to ++ * another list corrupts m_fp_list. ++ */ ++ down_write(&fp->f_ci->m_lock); ++ list_del_init(&fp->node); ++ up_write(&fp->f_ci->m_lock); + +- fp = list_first_entry(head, struct ksmbd_file, node); +- list_del_init(&fp->node); ++ /* ++ * Drop both the durable lifetime reference and the transient reference ++ * taken by the scavenger under global_ft.lock. If a concurrent ++ * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp ++ * before the unlink above, that holder owns the final close via ++ * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is ++ * the last putter and finalises fp here. ++ */ ++ if (atomic_sub_and_test(2, &fp->refcount)) + __ksmbd_close_fd(NULL, fp); +- } + } + + static int ksmbd_durable_scavenger(void *dummy) + { + struct ksmbd_file *fp = NULL; ++ struct ksmbd_file *expired_fp; + unsigned int id; + unsigned int min_timeout = 1; + bool found_fp_timeout; +- LIST_HEAD(scavenger_list); + unsigned long remaining_jiffies; + + __module_get(THIS_MODULE); +@@ -815,8 +850,6 @@ static int ksmbd_durable_scavenger(void *dummy) + if (try_to_freeze()) + continue; + +- found_fp_timeout = false; +- + remaining_jiffies = wait_event_timeout(dh_wq, + ksmbd_durable_scavenger_alive() == false, + __msecs_to_jiffies(min_timeout)); +@@ -825,23 +858,39 @@ static int ksmbd_durable_scavenger(void *dummy) + else + min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; + +- write_lock(&global_ft.lock); +- idr_for_each_entry(global_ft.idr, fp, id) { +- if (!fp->durable_timeout) +- continue; +- +- if (atomic_read(&fp->refcount) > 1 || +- fp->conn) +- continue; +- +- found_fp_timeout = true; +- if (fp->durable_scavenger_timeout <= +- jiffies_to_msecs(jiffies)) { +- __ksmbd_remove_durable_fd(fp); +- list_add(&fp->node, &scavenger_list); +- } else { ++ do { ++ expired_fp = NULL; ++ found_fp_timeout = false; ++ ++ write_lock(&global_ft.lock); ++ idr_for_each_entry(global_ft.idr, fp, id) { + unsigned long durable_timeout; + ++ if (!fp->durable_timeout) ++ continue; ++ ++ if (atomic_read(&fp->refcount) > 1 || ++ fp->conn) ++ continue; ++ ++ found_fp_timeout = true; ++ if (fp->durable_scavenger_timeout <= ++ jiffies_to_msecs(jiffies)) { ++ __ksmbd_remove_durable_fd(fp); ++ /* ++ * Take a transient reference so fp ++ * cannot be freed by an in-flight ++ * ksmbd_lookup_fd_inode() that found ++ * it through f_ci->m_fp_list while we ++ * drop global_ft.lock and reach the ++ * m_fp_list unlink in ++ * ksmbd_scavenger_dispose_dh(). ++ */ ++ atomic_inc(&fp->refcount); ++ expired_fp = fp; ++ break; ++ } ++ + durable_timeout = + fp->durable_scavenger_timeout - + jiffies_to_msecs(jiffies); +@@ -849,10 +898,11 @@ static int ksmbd_durable_scavenger(void *dummy) + if (min_timeout > durable_timeout) + min_timeout = durable_timeout; + } +- } +- write_unlock(&global_ft.lock); ++ write_unlock(&global_ft.lock); + +- ksmbd_scavenger_dispose_dh(&scavenger_list); ++ if (expired_fp) ++ ksmbd_scavenger_dispose_dh(expired_fp); ++ } while (expired_fp); + + if (found_fp_timeout == false) + break; +-- +2.53.0 + diff --git a/queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch new file mode 100644 index 0000000000..001fe8f1f2 --- /dev/null +++ b/queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch @@ -0,0 +1,42 @@ +From b33550234deaf50dfd7a45dd61c74481a920c21a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 20:45:41 -0400 +Subject: Revert "x86/vdso: Fix output operand size of RDPID" + +This reverts commit d607e6b349b014df1d2d0399f6667322626450e0. + +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/segment.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h +index 00cefbb59fa98..9d6411c659205 100644 +--- a/arch/x86/include/asm/segment.h ++++ b/arch/x86/include/asm/segment.h +@@ -244,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) + + static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + { +- unsigned long p; ++ unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP +@@ -254,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + * + * If RDPID is available, use it. + */ +- alternative_io ("lsl %[seg],%k[p]", +- "rdpid %[p]", ++ alternative_io ("lsl %[seg],%[p]", ++ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, +- [p] "=r" (p), [seg] "r" (__CPUNODE_SEG)); ++ [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch b/queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch new file mode 100644 index 0000000000..3d12227bcf --- /dev/null +++ b/queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch @@ -0,0 +1,53 @@ +From e9c6d98eed1da1a5e7d1c7e416b262bc9a26c0e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 23:11:15 +0200 +Subject: sched/deadline: Always stop dl-server before changing parameters + +From: Juri Lelli + +commit bb4700adc3abec34c0a38b64f66258e4e233fc16 upstream. + +Commit cccb45d7c4295 ("sched/deadline: Less agressive dl_server +handling") reduced dl-server overhead by delaying disabling servers only +after there are no fair task around for a whole period, which means that +deadline entities are not dequeued right away on a server stop event. +However, the delay opens up a window in which a request for changing +server parameters can break per-runqueue running_bw tracking, as +reported by Yuri. + +Close the problematic window by unconditionally calling dl_server_stop() +before applying the new parameters (ensuring deadline entities go +through an actual dequeue). + +Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") +Reported-by: Yuri Andriaccio +Signed-off-by: Juri Lelli +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Valentin Schneider +Link: https://lore.kernel.org/r/20250721-upstream-fix-dlserver-lessaggressive-b4-v1-1-4ebc10c87e40@redhat.com +Signed-off-by: Lukas Beckmann +Signed-off-by: Sasha Levin +--- + kernel/sched/debug.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index 7d14e9fa53ac3..564ea17ae405e 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -378,10 +378,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu + return -EINVAL; + } + +- if (rq->cfs.h_nr_queued) { +- update_rq_clock(rq); +- dl_server_stop(&rq->fair_server); +- } ++ update_rq_clock(rq); ++ dl_server_stop(&rq->fair_server); + + retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); + +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-fix-dl_server-behaviour.patch b/queue-6.12/sched-deadline-fix-dl_server-behaviour.patch new file mode 100644 index 0000000000..99daacd549 --- /dev/null +++ b/queue-6.12/sched-deadline-fix-dl_server-behaviour.patch @@ -0,0 +1,153 @@ +From fa6e456ab9d98aabbc46a269473f281a87b336e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 23:11:17 +0200 +Subject: sched/deadline: Fix dl_server behaviour + +From: Peter Zijlstra + +commit a3a70caf7906708bf9bbc80018752a6b36543808 upstream. + +John reported undesirable behaviour with the dl_server since commit: +cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling"). + +When starving fair tasks on purpose (starting spinning FIFO tasks), +his fair workload, which often goes (briefly) idle, would delay fair +invocations for a second, running one invocation per second was both +unexpected and terribly slow. + +The reason this happens is that when dl_se->server_pick_task() returns +NULL, indicating no runnable tasks, it would yield, pushing any later +jobs out a whole period (1 second). + +Instead simply stop the server. This should restore behaviour in that +a later wakeup (which restarts the server) will be able to continue +running (subject to the CBS wakeup rules). + +Notably, this does not re-introduce the behaviour cccb45d7c4295 set +out to solve, any start/stop cycle is naturally throttled by the timer +period (no active cancel). + +Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") +Reported-by: John Stultz +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: John Stultz +Closes: https://lore.kernel.org/regressions/04657838-46d1-432d-95e1-eb73b930b032@mailbox.org +Signed-off-by: Lukas Beckmann +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 1 - + kernel/sched/deadline.c | 23 ++--------------------- + kernel/sched/sched.h | 33 +++++++++++++++++++++++++++++++-- + 3 files changed, 33 insertions(+), 24 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 464d281aa2e49..f9ffe42cae171 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -674,7 +674,6 @@ struct sched_dl_entity { + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; +- unsigned int dl_server_idle : 1; + + /* + * Bandwidth enforcement timer. Each -deadline task has its +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 609783d7de290..a6c699e43111d 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1621,10 +1621,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) + void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) + { + /* 0 runtime = fair server disabled */ +- if (dl_se->dl_runtime) { +- dl_se->dl_server_idle = 0; ++ if (dl_se->dl_runtime) + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); +- } + } + + /* +@@ -1868,20 +1866,6 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + dl_se->dl_server_active = 0; + } + +-static bool dl_server_stopped(struct sched_dl_entity *dl_se) +-{ +- if (!dl_se->dl_server_active) +- return true; +- +- if (dl_se->dl_server_idle) { +- dl_server_stop(dl_se); +- return true; +- } +- +- dl_se->dl_server_idle = 1; +- return false; +-} +- + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, + dl_server_pick_f pick_task) + { +@@ -2637,10 +2621,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq) + if (dl_server(dl_se)) { + p = dl_se->server_pick_task(dl_se); + if (!p) { +- if (!dl_server_stopped(dl_se)) { +- dl_se->dl_yielded = 1; +- update_curr_dl_se(rq, dl_se, 0); +- } ++ dl_server_stop(dl_se); + goto again; + } + rq->dl_server = dl_se; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 9391ff62cdaaa..7956abeb9154e 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -377,10 +377,39 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6 + * dl_server_update() -- called from update_curr_common(), propagates runtime + * to the server. + * +- * dl_server_start() +- * dl_server_stop() -- start/stop the server when it has (no) tasks. ++ * dl_server_start() -- start the server when it has tasks; it will stop ++ * automatically when there are no more tasks, per ++ * dl_se::server_pick() returning NULL. ++ * ++ * dl_server_stop() -- (force) stop the server; use when updating ++ * parameters. + * + * dl_server_init() -- initializes the server. ++ * ++ * When started the dl_server will (per dl_defer) schedule a timer for its ++ * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a ++ * server will run at the very end of its period. ++ * ++ * This is done such that any runtime from the target class can be accounted ++ * against the server -- through dl_server_update() above -- such that when it ++ * becomes time to run, it might already be out of runtime and get deferred ++ * until the next period. In this case dl_server_timer() will alternate ++ * between defer and replenish but never actually enqueue the server. ++ * ++ * Only when the target class does not manage to exhaust the server's runtime ++ * (there's actualy starvation in the given period), will the dl_server get on ++ * the runqueue. Once queued it will pick tasks from the target class and run ++ * them until either its runtime is exhaused, at which point its back to ++ * dl_server_timer, or until there are no more tasks to run, at which point ++ * the dl_server stops itself. ++ * ++ * By stopping at this point the dl_server retains bandwidth, which, if a new ++ * task wakes up imminently (starting the server again), can be used -- ++ * subject to CBS wakeup rules -- without having to wait for the next period. ++ * ++ * Additionally, because of the dl_defer behaviour the start/stop behaviour is ++ * naturally thottled to once per period, avoiding high context switch ++ * workloads from spamming the hrtimer program/cancel paths. + */ + extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec); + extern void dl_server_start(struct sched_dl_entity *dl_se); +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch b/queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch new file mode 100644 index 0000000000..4b4e855f86 --- /dev/null +++ b/queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch @@ -0,0 +1,162 @@ +From cf7902eb8cf1b0096b2e1b6eceba972e7cc1c4bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 23:11:16 +0200 +Subject: sched/deadline: Fix dl_server getting stuck + +From: Peter Zijlstra + +commit 4ae8d9aa9f9dc7137ea5e564d79c5aa5af1bc45c upstream. + +John found it was easy to hit lockup warnings when running locktorture +on a 2 CPU VM, which he bisected down to: commit cccb45d7c429 +("sched/deadline: Less agressive dl_server handling"). + +While debugging it seems there is a chance where we end up with the +dl_server dequeued, with dl_se->dl_server_active. This causes +dl_server_start() to return without enqueueing the dl_server, thus it +fails to run when RT tasks starve the cpu. + +When this happens, dl_server_timer() catches the +'!dl_se->server_has_tasks(dl_se)' case, which then calls +replenish_dl_entity() and dl_server_stopped() and finally return +HRTIMER_NO_RESTART. + +This ends in no new timer and also no enqueue, leaving the dl_server +'dead', allowing starvation. + +What should have happened is for the bandwidth timer to start the +zero-laxity timer, which in turn would enqueue the dl_server and cause +dl_se->server_pick_task() to be called -- which will stop the +dl_server if no fair tasks are observed for a whole period. + +IOW, it is totally irrelevant if there are fair tasks at the moment of +bandwidth refresh. + +This removes all dl_se->server_has_tasks() users, so remove the whole +thing. + +Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") +Reported-by: John Stultz +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: John Stultz +[ adjust renamed variable in fair_server_has_tasks (which this patch +removes) ] +Signed-off-by: Lukas Beckmann +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 1 - + kernel/sched/deadline.c | 12 +----------- + kernel/sched/fair.c | 7 +------ + kernel/sched/sched.h | 4 ---- + 4 files changed, 2 insertions(+), 22 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 299a65a92d2e6..464d281aa2e49 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -701,7 +701,6 @@ struct sched_dl_entity { + * runnable task. + */ + struct rq *rq; +- dl_server_has_tasks_f server_has_tasks; + dl_server_pick_f server_pick_task; + + #ifdef CONFIG_RT_MUTEXES +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 6ff9055a69811..609783d7de290 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -916,7 +916,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) + */ + if (dl_se->dl_defer && !dl_se->dl_defer_running && + dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) { +- if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) { ++ if (!is_dl_boosted(dl_se)) { + + /* + * Set dl_se->dl_defer_armed and dl_throttled variables to +@@ -1201,8 +1201,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) + /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ + static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; + +-static bool dl_server_stopped(struct sched_dl_entity *dl_se); +- + static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) + { + struct rq *rq = rq_of_dl_se(dl_se); +@@ -1220,12 +1218,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ + if (!dl_se->dl_runtime) + return HRTIMER_NORESTART; + +- if (!dl_se->server_has_tasks(dl_se)) { +- replenish_dl_entity(dl_se); +- dl_server_stopped(dl_se); +- return HRTIMER_NORESTART; +- } +- + if (dl_se->dl_defer_armed) { + /* + * First check if the server could consume runtime in background. +@@ -1891,11 +1883,9 @@ static bool dl_server_stopped(struct sched_dl_entity *dl_se) + } + + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, +- dl_server_has_tasks_f has_tasks, + dl_server_pick_f pick_task) + { + dl_se->rq = rq; +- dl_se->server_has_tasks = has_tasks; + dl_se->server_pick_task = pick_task; + } + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index d26e078d0623f..f36512892adf9 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -9058,11 +9058,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru + return pick_next_task_fair(rq, prev, NULL); + } + +-static bool fair_server_has_tasks(struct sched_dl_entity *dl_se) +-{ +- return !!dl_se->rq->cfs.nr_running; +-} +- + static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se) + { + return pick_task_fair(dl_se->rq); +@@ -9074,7 +9069,7 @@ void fair_server_init(struct rq *rq) + + init_dl_entity(dl_se); + +- dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task); ++ dl_server_init(dl_se, rq, fair_server_pick_task); + } + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index a09e2d25edd57..9391ff62cdaaa 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -371,9 +371,6 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6 + * + * dl_se::rq -- runqueue we belong to. + * +- * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the +- * server when it runs out of tasks to run. +- * + * dl_se::server_pick() -- nested pick_next_task(); we yield the period if this + * returns NULL. + * +@@ -389,7 +386,6 @@ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec); + extern void dl_server_start(struct sched_dl_entity *dl_se); + extern void dl_server_stop(struct sched_dl_entity *dl_se); + extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, +- dl_server_has_tasks_f has_tasks, + dl_server_pick_f pick_task); + + extern void dl_server_update_idle_time(struct rq *rq, +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-fix-dl_server_stopped.patch b/queue-6.12/sched-deadline-fix-dl_server_stopped.patch new file mode 100644 index 0000000000..114bcee846 --- /dev/null +++ b/queue-6.12/sched-deadline-fix-dl_server_stopped.patch @@ -0,0 +1,39 @@ +From 125f84b14921cf6acf104ac26f3ce38448538fa2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 23:11:14 +0200 +Subject: sched/deadline: Fix dl_server_stopped() + +From: Huacai Chen + +commit 4717432dfd99bbd015b6782adca216c6f9340038 upstream. + +Commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") +introduces dl_server_stopped(). But it is obvious that dl_server_stopped() +should return true if dl_se->dl_server_active is 0. + +Fixes: cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") +Signed-off-by: Huacai Chen +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20250809130419.1980742-1-chenhuacai@loongson.cn +Signed-off-by: Lukas Beckmann +Signed-off-by: Sasha Levin +--- + kernel/sched/deadline.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 9c5fa95b345a5..6ff9055a69811 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1879,7 +1879,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + static bool dl_server_stopped(struct sched_dl_entity *dl_se) + { + if (!dl_se->dl_server_active) +- return false; ++ return true; + + if (dl_se->dl_server_idle) { + dl_server_stop(dl_se); +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch b/queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch new file mode 100644 index 0000000000..770afbe0b9 --- /dev/null +++ b/queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch @@ -0,0 +1,165 @@ +From bf89db6d1042902e67f75e43ac5ce8105cf26c26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 23:11:13 +0200 +Subject: sched/deadline: Less agressive dl_server handling + +From: Peter Zijlstra + +commit cccb45d7c4295bbfeba616582d0249f2d21e6df5 upstream. + +Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default +bandwidth control") caused a significant dip in his favourite +benchmark of the day. Simply disabling dl_server cured things. + +His workload hammers the 0->1, 1->0 transitions, and the +dl_server_{start,stop}() overhead kills it -- fairly obviously a bad +idea in hind sight and all that. + +Change things around to only disable the dl_server when there has not +been a fair task around for a whole period. Since the default period +is 1 second, this ensures the benchmark never trips this, overhead +gone. + +Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server") +Reported-by: Chris Mason +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Juri Lelli +Acked-by: Juri Lelli +Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org +[ adjust context for renamed/removed variable names ] +Signed-off-by: Lukas Beckmann +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 1 + + kernel/sched/deadline.c | 25 ++++++++++++++++++++++--- + kernel/sched/fair.c | 9 --------- + 3 files changed, 23 insertions(+), 12 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2e4c437c7c902..299a65a92d2e6 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -674,6 +674,7 @@ struct sched_dl_entity { + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; ++ unsigned int dl_server_idle : 1; + + /* + * Bandwidth enforcement timer. Each -deadline task has its +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 1ef891f8e3f2f..9c5fa95b345a5 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1201,6 +1201,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) + /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ + static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se); ++ + static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) + { + struct rq *rq = rq_of_dl_se(dl_se); +@@ -1220,6 +1222,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ + + if (!dl_se->server_has_tasks(dl_se)) { + replenish_dl_entity(dl_se); ++ dl_server_stopped(dl_se); + return HRTIMER_NORESTART; + } + +@@ -1626,8 +1629,10 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) + void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) + { + /* 0 runtime = fair server disabled */ +- if (dl_se->dl_runtime) ++ if (dl_se->dl_runtime) { ++ dl_se->dl_server_idle = 0; + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); ++ } + } + + /* +@@ -1850,7 +1855,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) + setup_new_dl_entity(dl_se); + } + +- if (!dl_se->dl_runtime) ++ if (!dl_se->dl_runtime || dl_se->dl_server_active) + return; + + dl_se->dl_server_active = 1; +@@ -1871,6 +1876,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + dl_se->dl_server_active = 0; + } + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se) ++{ ++ if (!dl_se->dl_server_active) ++ return false; ++ ++ if (dl_se->dl_server_idle) { ++ dl_server_stop(dl_se); ++ return true; ++ } ++ ++ dl_se->dl_server_idle = 1; ++ return false; ++} ++ + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, + dl_server_has_tasks_f has_tasks, + dl_server_pick_f pick_task) +@@ -2628,7 +2647,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq) + if (dl_server(dl_se)) { + p = dl_se->server_pick_task(dl_se); + if (!p) { +- if (dl_server_active(dl_se)) { ++ if (!dl_server_stopped(dl_se)) { + dl_se->dl_yielded = 1; + update_curr_dl_se(rq, dl_se, 0); + } +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index a0a47e50b71ca..d26e078d0623f 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5972,7 +5972,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + long queued_delta, runnable_delta, idle_task_delta, delayed_delta, dequeue = 1; +- long rq_h_nr_queued = rq->cfs.h_nr_queued; + + raw_spin_lock(&cfs_b->lock); + /* This will start the period timer if necessary */ +@@ -6059,10 +6058,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + /* At this point se is NULL and we are at root level*/ + sub_nr_running(rq, queued_delta); +- +- /* Stop the fair server if throttling resulted in no runnable tasks */ +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); + done: + /* + * Note: distribution will already see us throttled via the +@@ -7162,7 +7157,6 @@ static void set_next_buddy(struct sched_entity *se); + static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + { + bool was_sched_idle = sched_idle_rq(rq); +- int rq_h_nr_queued = rq->cfs.h_nr_queued; + bool task_sleep = flags & DEQUEUE_SLEEP; + bool task_delayed = flags & DEQUEUE_DELAYED; + struct task_struct *p = NULL; +@@ -7251,9 +7245,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + + sub_nr_running(rq, h_nr_queued); + +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); +- + /* balance early to pull high priority tasks */ + if (unlikely(!was_sched_idle && sched_idle_rq(rq))) + rq->next_balance = jiffies; +-- +2.53.0 + diff --git a/queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch b/queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch new file mode 100644 index 0000000000..0aa5c8ea62 --- /dev/null +++ b/queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch @@ -0,0 +1,84 @@ +From 1d75df038698b9fa4534cdcdbd98588ea50c6c1f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Oct 2025 00:17:27 +0530 +Subject: sched/deadline: Stop dl_server before CPU goes offline + +From: Peter Zijlstra (Intel) + +[ Upstream commit ee6e44dfe6e50b4a5df853d933a96bdff5309e6e ] + +IBM CI tool reported kernel warning[1] when running a CPU removal +operation through drmgr[2]. i.e "drmgr -c cpu -r -q 1" + +WARNING: CPU: 0 PID: 0 at kernel/sched/cpudeadline.c:219 cpudl_set+0x58/0x170 +NIP [c0000000002b6ed8] cpudl_set+0x58/0x170 +LR [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 +Call Trace: +[c000000002c2f8c0] init_stack+0x78c0/0x8000 (unreliable) +[c0000000002b7cb8] dl_server_timer+0x168/0x2a0 +[c00000000034df84] __hrtimer_run_queues+0x1a4/0x390 +[c00000000034f624] hrtimer_interrupt+0x124/0x300 +[c00000000002a230] timer_interrupt+0x140/0x320 + +Git bisects to: commit 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") + +This happens since: +- dl_server hrtimer gets enqueued close to cpu offline, when + kthread_park enqueues a fair task. +- CPU goes offline and drmgr removes it from cpu_present_mask. +- hrtimer fires and warning is hit. + +Fix it by stopping the dl_server before CPU is marked dead. + +[1]: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com/ +[2]: https://github.com/ibm-power-utilities/powerpc-utils/tree/next/src/drmgr + +[sshegde: wrote the changelog and tested it] +Fixes: 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") +Closes: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com +Signed-off-by: Peter Zijlstra (Intel) +Reported-by: Venkat Rao Bagalkote +Signed-off-by: Shrikanth Hegde +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Marek Szyprowski +Tested-by: Shrikanth Hegde +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 ++ + kernel/sched/deadline.c | 3 +++ + 2 files changed, 5 insertions(+) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 9b238c9c71c67..1b1ddd24cb227 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8319,10 +8319,12 @@ int sched_cpu_dying(unsigned int cpu) + sched_tick_stop(cpu); + + rq_lock_irqsave(rq, &rf); ++ update_rq_clock(rq); + if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { + WARN(true, "Dying CPU not properly vacated!"); + dump_rq_tasks(rq, KERN_WARNING); + } ++ dl_server_stop(&rq->fair_server); + rq_unlock_irqrestore(rq, &rf); + + calc_load_migrate(rq); +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index a6c699e43111d..cb8eff0ebd228 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1848,6 +1848,9 @@ void dl_server_start(struct sched_dl_entity *dl_se) + if (!dl_se->dl_runtime || dl_se->dl_server_active) + return; + ++ if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) ++ return; ++ + dl_se->dl_server_active = 1; + enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); + if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) +-- +2.53.0 + diff --git a/queue-6.12/series b/queue-6.12/series index b7ea8babb3..8f81d2a62d 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -11,3 +11,11 @@ revert-perf-python-add-parse_events-function.patch revert-perf-tool_pmu-factor-tool-events-into-their-o.patch bridge-mrp-reject-zero-test-interval-to-avoid-oom-pa.patch spi-spi-dw-dma-fix-print-error-log-when-wait-finish-.patch +revert-x86-vdso-fix-output-operand-size-of-rdpid.patch +sched-deadline-less-agressive-dl_server-handling.patch +sched-deadline-fix-dl_server_stopped.patch +sched-deadline-always-stop-dl-server-before-changing.patch +sched-deadline-fix-dl_server-getting-stuck.patch +sched-deadline-fix-dl_server-behaviour.patch +sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch +ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch diff --git a/queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch new file mode 100644 index 0000000000..e4466d3c83 --- /dev/null +++ b/queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch @@ -0,0 +1,260 @@ +From 9c19b2a43886dcf9c15e41d42b4558e71620556a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Apr 2026 23:08:56 +0900 +Subject: ksmbd: close durable scavenger races against m_fp_list lookups + +From: DaeMyung Kang + +[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ] + +ksmbd_durable_scavenger() has two related races against any walker +that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode() +(used by ksmbd_vfs_rename) and the share-mode checks in +fs/smb/server/smb_common.c. + +(1) fp->node list-head reuse. Durable-preserved handles can remain +linked on f_ci->m_fp_list after session teardown so share-mode checks +still see them while the handle is reconnectable. The scavenger +collected expired handles by adding fp->node to a local +scavenger_list after removing them from the global durable idr. +Because fp->node is the same list_head used by m_fp_list, +list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links +and corrupts both lists. CONFIG_DEBUG_LIST can report this on the +share-mode walk path. + +(2) Refcount race against m_fp_list walkers. The scavenger qualifies +an expired durable handle with atomic_read(&fp->refcount) > 1 and +fp->conn under global_ft.lock, removes fp from global_ft, then drops +global_ft.lock before unlinking fp from m_fp_list and freeing it. +During that gap fp is still linked on m_fp_list with f_state == +FP_INITED. ksmbd_lookup_fd_inode() under m_lock read calls +ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and +takes a live reference; the scavenger then unlinks and frees fp +while the holder owns a reference, leading to UAF on the holder's +subsequent ksmbd_fd_put() and on any field reads performed by a +concurrent share-mode walker that iterates m_fp_list without taking +ksmbd_fp_get() (smb_check_perm_dleases-like paths). + +Fix both: + + * Stop reusing fp->node as a scavenger-private list node. Remove + one expired handle from global_ft under global_ft.lock, take an + explicit transient reference, drop the lock, unlink fp->node + from m_fp_list under f_ci->m_lock, then drop both the durable + lifetime and transient references with atomic_sub_and_test(2, + &fp->refcount). If the scavenger is the last putter the close + runs there; otherwise an in-flight holder that already raced + through the m_fp_list lookup owns the final close via its + ksmbd_fd_put() path. The one-at-a-time disposal can rescan the + durable idr when multiple handles expire in the same pass, but + durable scavenging is a background expiration path and the final + full scan recomputes min_timeout before the next wait. + + * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right + after idr_remove(), so a delayed final close from a holder that + snatched fp does not re-issue idr_remove() on a persistent id + that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have + already handed out to a brand-new durable handle. + + * Bypass the per-conn open_files_count decrement in + __put_fd_final() when fp is detached from any session table + (fp->conn cleared by session_fd_check() at durable preserve -- + paired with the volatile_id clear at unpublish, so checking + fp->conn alone is sufficient). The walker that owns the final + close runs from an unrelated work->conn whose + stats.open_files_count never tracked this durable fp; without + this guard the holder would underflow that unrelated counter. + +The two races are folded into one patch because patch (1) alone +cleans up the corrupted list but leaves a deterministic UAF window +for m_fp_list walkers that the transient-reference and +persistent_id discipline in (2) close; bisecting onto an +intermediate state would land on a UAF that pre-patch chaos merely +made less reproducible. + +Validation: + * CONFIG_DEBUG_LIST coverage for the list_head reuse path. + * KASAN-enabled direct SMB2 durable-handle coverage that exercised + ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode() + returns while durable handles expired under concurrent rename + lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING + reports. + * checkpatch --strict + * make -j$(nproc) M=fs/smb/server + +Fixes: d484d621d40f ("ksmbd: add durable scavenger timer") +Signed-off-by: DaeMyung Kang +Acked-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/server/vfs_cache.c | 104 ++++++++++++++++++++++++++++---------- + 1 file changed, 77 insertions(+), 27 deletions(-) + +diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c +index d29cc1d01bd2c..a8fed467e9b69 100644 +--- a/fs/smb/server/vfs_cache.c ++++ b/fs/smb/server/vfs_cache.c +@@ -325,6 +325,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) + return; + + idr_remove(global_ft.idr, fp->persistent_id); ++ /* ++ * Clear persistent_id so a later __ksmbd_close_fd() that runs from a ++ * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() ++ * walker held the final reference) does not re-issue idr_remove() on ++ * an id that idr_alloc_cyclic() may have already handed out to a new ++ * durable handle. ++ */ ++ fp->persistent_id = KSMBD_NO_FID; + } + + static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) +@@ -417,6 +425,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, + + static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) + { ++ /* ++ * Detached durable fp -- session_fd_check() cleared fp->conn at ++ * preserve, so this fp is no longer tracked by any conn's ++ * stats.open_files_count. This happens when ++ * ksmbd_scavenger_dispose_dh() hands the final close off to an ++ * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn ++ * is unrelated to the conn that originally opened the handle; close ++ * via the NULL-ft path so we do not underflow that unrelated ++ * counter. ++ */ ++ if (!fp->conn) { ++ __ksmbd_close_fd(NULL, fp); ++ return; ++ } + __ksmbd_close_fd(&work->sess->file_table, fp); + atomic_dec(&work->conn->stats.open_files_count); + } +@@ -788,24 +810,37 @@ static bool ksmbd_durable_scavenger_alive(void) + return true; + } + +-static void ksmbd_scavenger_dispose_dh(struct list_head *head) ++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) + { +- while (!list_empty(head)) { +- struct ksmbd_file *fp; ++ /* ++ * Durable-preserved fp can remain linked on f_ci->m_fp_list for ++ * share-mode checks. Unlink it before final close; fp->node is not ++ * available as a scavenger-private list node because re-adding it to ++ * another list corrupts m_fp_list. ++ */ ++ down_write(&fp->f_ci->m_lock); ++ list_del_init(&fp->node); ++ up_write(&fp->f_ci->m_lock); + +- fp = list_first_entry(head, struct ksmbd_file, node); +- list_del_init(&fp->node); ++ /* ++ * Drop both the durable lifetime reference and the transient reference ++ * taken by the scavenger under global_ft.lock. If a concurrent ++ * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp ++ * before the unlink above, that holder owns the final close via ++ * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is ++ * the last putter and finalises fp here. ++ */ ++ if (atomic_sub_and_test(2, &fp->refcount)) + __ksmbd_close_fd(NULL, fp); +- } + } + + static int ksmbd_durable_scavenger(void *dummy) + { + struct ksmbd_file *fp = NULL; ++ struct ksmbd_file *expired_fp; + unsigned int id; + unsigned int min_timeout = 1; + bool found_fp_timeout; +- LIST_HEAD(scavenger_list); + unsigned long remaining_jiffies; + + __module_get(THIS_MODULE); +@@ -815,8 +850,6 @@ static int ksmbd_durable_scavenger(void *dummy) + if (try_to_freeze()) + continue; + +- found_fp_timeout = false; +- + remaining_jiffies = wait_event_timeout(dh_wq, + ksmbd_durable_scavenger_alive() == false, + __msecs_to_jiffies(min_timeout)); +@@ -825,23 +858,39 @@ static int ksmbd_durable_scavenger(void *dummy) + else + min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; + +- write_lock(&global_ft.lock); +- idr_for_each_entry(global_ft.idr, fp, id) { +- if (!fp->durable_timeout) +- continue; +- +- if (atomic_read(&fp->refcount) > 1 || +- fp->conn) +- continue; +- +- found_fp_timeout = true; +- if (fp->durable_scavenger_timeout <= +- jiffies_to_msecs(jiffies)) { +- __ksmbd_remove_durable_fd(fp); +- list_add(&fp->node, &scavenger_list); +- } else { ++ do { ++ expired_fp = NULL; ++ found_fp_timeout = false; ++ ++ write_lock(&global_ft.lock); ++ idr_for_each_entry(global_ft.idr, fp, id) { + unsigned long durable_timeout; + ++ if (!fp->durable_timeout) ++ continue; ++ ++ if (atomic_read(&fp->refcount) > 1 || ++ fp->conn) ++ continue; ++ ++ found_fp_timeout = true; ++ if (fp->durable_scavenger_timeout <= ++ jiffies_to_msecs(jiffies)) { ++ __ksmbd_remove_durable_fd(fp); ++ /* ++ * Take a transient reference so fp ++ * cannot be freed by an in-flight ++ * ksmbd_lookup_fd_inode() that found ++ * it through f_ci->m_fp_list while we ++ * drop global_ft.lock and reach the ++ * m_fp_list unlink in ++ * ksmbd_scavenger_dispose_dh(). ++ */ ++ atomic_inc(&fp->refcount); ++ expired_fp = fp; ++ break; ++ } ++ + durable_timeout = + fp->durable_scavenger_timeout - + jiffies_to_msecs(jiffies); +@@ -849,10 +898,11 @@ static int ksmbd_durable_scavenger(void *dummy) + if (min_timeout > durable_timeout) + min_timeout = durable_timeout; + } +- } +- write_unlock(&global_ft.lock); ++ write_unlock(&global_ft.lock); + +- ksmbd_scavenger_dispose_dh(&scavenger_list); ++ if (expired_fp) ++ ksmbd_scavenger_dispose_dh(expired_fp); ++ } while (expired_fp); + + if (found_fp_timeout == false) + break; +-- +2.53.0 + diff --git a/queue-6.18/series b/queue-6.18/series index d35c0223a6..1a26a0def3 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -7,3 +7,4 @@ sched-employ-sched_change-guards.patch sched-deadline-fix-missing-enqueue_replenish-during-.patch bridge-mrp-reject-zero-test-interval-to-avoid-oom-pa.patch spi-spi-dw-dma-fix-print-error-log-when-wait-finish-.patch +ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch diff --git a/queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch new file mode 100644 index 0000000000..0755872f52 --- /dev/null +++ b/queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch @@ -0,0 +1,42 @@ +From c01dc464b7f0e8ef2a7b98a38a3a71ee049fc61c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 May 2026 20:45:55 -0400 +Subject: Revert "x86/vdso: Fix output operand size of RDPID" + +This reverts commit db82a94b0c109e7697a593f36783e7ef36073983. + +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/segment.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h +index 00cefbb59fa98..9d6411c659205 100644 +--- a/arch/x86/include/asm/segment.h ++++ b/arch/x86/include/asm/segment.h +@@ -244,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) + + static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + { +- unsigned long p; ++ unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP +@@ -254,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) + * + * If RDPID is available, use it. + */ +- alternative_io ("lsl %[seg],%k[p]", +- "rdpid %[p]", ++ alternative_io ("lsl %[seg],%[p]", ++ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, +- [p] "=r" (p), [seg] "r" (__CPUNODE_SEG)); ++ [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); +-- +2.53.0 + diff --git a/queue-6.6/series b/queue-6.6/series index a6d241119c..88e063ebc2 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -7,3 +7,4 @@ driver-core-generalize-driver_override-in-struct-dev.patch driver-core-platform-use-generic-driver_override-inf.patch s390-debug-reject-zero-length-input-before-trimming-.patch wifi-mac80211-check-tdls-flag-in-ieee80211_tdls_oper.patch +revert-x86-vdso-fix-output-operand-size-of-rdpid.patch diff --git a/queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch new file mode 100644 index 0000000000..456cb0c031 --- /dev/null +++ b/queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch @@ -0,0 +1,259 @@ +From 5163e728f460c9baa126d1f0bf14beefd3a9fb9d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Apr 2026 23:08:56 +0900 +Subject: ksmbd: close durable scavenger races against m_fp_list lookups + +From: DaeMyung Kang + +[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ] + +ksmbd_durable_scavenger() has two related races against any walker +that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode() +(used by ksmbd_vfs_rename) and the share-mode checks in +fs/smb/server/smb_common.c. + +(1) fp->node list-head reuse. Durable-preserved handles can remain +linked on f_ci->m_fp_list after session teardown so share-mode checks +still see them while the handle is reconnectable. The scavenger +collected expired handles by adding fp->node to a local +scavenger_list after removing them from the global durable idr. +Because fp->node is the same list_head used by m_fp_list, +list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links +and corrupts both lists. CONFIG_DEBUG_LIST can report this on the +share-mode walk path. + +(2) Refcount race against m_fp_list walkers. The scavenger qualifies +an expired durable handle with atomic_read(&fp->refcount) > 1 and +fp->conn under global_ft.lock, removes fp from global_ft, then drops +global_ft.lock before unlinking fp from m_fp_list and freeing it. +During that gap fp is still linked on m_fp_list with f_state == +FP_INITED. ksmbd_lookup_fd_inode() under m_lock read calls +ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and +takes a live reference; the scavenger then unlinks and frees fp +while the holder owns a reference, leading to UAF on the holder's +subsequent ksmbd_fd_put() and on any field reads performed by a +concurrent share-mode walker that iterates m_fp_list without taking +ksmbd_fp_get() (smb_check_perm_dleases-like paths). + +Fix both: + + * Stop reusing fp->node as a scavenger-private list node. Remove + one expired handle from global_ft under global_ft.lock, take an + explicit transient reference, drop the lock, unlink fp->node + from m_fp_list under f_ci->m_lock, then drop both the durable + lifetime and transient references with atomic_sub_and_test(2, + &fp->refcount). If the scavenger is the last putter the close + runs there; otherwise an in-flight holder that already raced + through the m_fp_list lookup owns the final close via its + ksmbd_fd_put() path. The one-at-a-time disposal can rescan the + durable idr when multiple handles expire in the same pass, but + durable scavenging is a background expiration path and the final + full scan recomputes min_timeout before the next wait. + + * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right + after idr_remove(), so a delayed final close from a holder that + snatched fp does not re-issue idr_remove() on a persistent id + that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have + already handed out to a brand-new durable handle. + + * Bypass the per-conn open_files_count decrement in + __put_fd_final() when fp is detached from any session table + (fp->conn cleared by session_fd_check() at durable preserve -- + paired with the volatile_id clear at unpublish, so checking + fp->conn alone is sufficient). The walker that owns the final + close runs from an unrelated work->conn whose + stats.open_files_count never tracked this durable fp; without + this guard the holder would underflow that unrelated counter. + +The two races are folded into one patch because patch (1) alone +cleans up the corrupted list but leaves a deterministic UAF window +for m_fp_list walkers that the transient-reference and +persistent_id discipline in (2) close; bisecting onto an +intermediate state would land on a UAF that pre-patch chaos merely +made less reproducible. + +Validation: + * CONFIG_DEBUG_LIST coverage for the list_head reuse path. + * KASAN-enabled direct SMB2 durable-handle coverage that exercised + ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode() + returns while durable handles expired under concurrent rename + lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING + reports. + * checkpatch --strict + * make -j$(nproc) M=fs/smb/server + +Fixes: d484d621d40f ("ksmbd: add durable scavenger timer") +Signed-off-by: DaeMyung Kang +Acked-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/server/vfs_cache.c | 102 ++++++++++++++++++++++++++++---------- + 1 file changed, 76 insertions(+), 26 deletions(-) + +diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c +index 3551f01a3fa03..60b7a2d60238a 100644 +--- a/fs/smb/server/vfs_cache.c ++++ b/fs/smb/server/vfs_cache.c +@@ -418,6 +418,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) + return; + + idr_remove(global_ft.idr, fp->persistent_id); ++ /* ++ * Clear persistent_id so a later __ksmbd_close_fd() that runs from a ++ * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() ++ * walker held the final reference) does not re-issue idr_remove() on ++ * an id that idr_alloc_cyclic() may have already handed out to a new ++ * durable handle. ++ */ ++ fp->persistent_id = KSMBD_NO_FID; + } + + static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) +@@ -510,6 +518,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, + + static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) + { ++ /* ++ * Detached durable fp -- session_fd_check() cleared fp->conn at ++ * preserve, so this fp is no longer tracked by any conn's ++ * stats.open_files_count. This happens when ++ * ksmbd_scavenger_dispose_dh() hands the final close off to an ++ * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn ++ * is unrelated to the conn that originally opened the handle; close ++ * via the NULL-ft path so we do not underflow that unrelated ++ * counter. ++ */ ++ if (!fp->conn) { ++ __ksmbd_close_fd(NULL, fp); ++ return; ++ } + __ksmbd_close_fd(&work->sess->file_table, fp); + atomic_dec(&work->conn->stats.open_files_count); + } +@@ -881,24 +903,37 @@ static bool ksmbd_durable_scavenger_alive(void) + return true; + } + +-static void ksmbd_scavenger_dispose_dh(struct list_head *head) ++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) + { +- while (!list_empty(head)) { +- struct ksmbd_file *fp; ++ /* ++ * Durable-preserved fp can remain linked on f_ci->m_fp_list for ++ * share-mode checks. Unlink it before final close; fp->node is not ++ * available as a scavenger-private list node because re-adding it to ++ * another list corrupts m_fp_list. ++ */ ++ down_write(&fp->f_ci->m_lock); ++ list_del_init(&fp->node); ++ up_write(&fp->f_ci->m_lock); + +- fp = list_first_entry(head, struct ksmbd_file, node); +- list_del_init(&fp->node); ++ /* ++ * Drop both the durable lifetime reference and the transient reference ++ * taken by the scavenger under global_ft.lock. If a concurrent ++ * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp ++ * before the unlink above, that holder owns the final close via ++ * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is ++ * the last putter and finalises fp here. ++ */ ++ if (atomic_sub_and_test(2, &fp->refcount)) + __ksmbd_close_fd(NULL, fp); +- } + } + + static int ksmbd_durable_scavenger(void *dummy) + { + struct ksmbd_file *fp = NULL; ++ struct ksmbd_file *expired_fp; + unsigned int id; + unsigned int min_timeout = 1; + bool found_fp_timeout; +- LIST_HEAD(scavenger_list); + unsigned long remaining_jiffies; + + __module_get(THIS_MODULE); +@@ -908,8 +943,6 @@ static int ksmbd_durable_scavenger(void *dummy) + if (try_to_freeze()) + continue; + +- found_fp_timeout = false; +- + remaining_jiffies = wait_event_timeout(dh_wq, + ksmbd_durable_scavenger_alive() == false, + __msecs_to_jiffies(min_timeout)); +@@ -918,23 +951,39 @@ static int ksmbd_durable_scavenger(void *dummy) + else + min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; + +- write_lock(&global_ft.lock); +- idr_for_each_entry(global_ft.idr, fp, id) { +- if (!fp->durable_timeout) +- continue; ++ do { ++ expired_fp = NULL; ++ found_fp_timeout = false; + +- if (atomic_read(&fp->refcount) > 1 || +- fp->conn) +- continue; +- +- found_fp_timeout = true; +- if (fp->durable_scavenger_timeout <= +- jiffies_to_msecs(jiffies)) { +- __ksmbd_remove_durable_fd(fp); +- list_add(&fp->node, &scavenger_list); +- } else { ++ write_lock(&global_ft.lock); ++ idr_for_each_entry(global_ft.idr, fp, id) { + unsigned long durable_timeout; + ++ if (!fp->durable_timeout) ++ continue; ++ ++ if (atomic_read(&fp->refcount) > 1 || ++ fp->conn) ++ continue; ++ ++ found_fp_timeout = true; ++ if (fp->durable_scavenger_timeout <= ++ jiffies_to_msecs(jiffies)) { ++ __ksmbd_remove_durable_fd(fp); ++ /* ++ * Take a transient reference so fp ++ * cannot be freed by an in-flight ++ * ksmbd_lookup_fd_inode() that found ++ * it through f_ci->m_fp_list while we ++ * drop global_ft.lock and reach the ++ * m_fp_list unlink in ++ * ksmbd_scavenger_dispose_dh(). ++ */ ++ atomic_inc(&fp->refcount); ++ expired_fp = fp; ++ break; ++ } ++ + durable_timeout = + fp->durable_scavenger_timeout - + jiffies_to_msecs(jiffies); +@@ -942,10 +991,11 @@ static int ksmbd_durable_scavenger(void *dummy) + if (min_timeout > durable_timeout) + min_timeout = durable_timeout; + } +- } +- write_unlock(&global_ft.lock); ++ write_unlock(&global_ft.lock); + +- ksmbd_scavenger_dispose_dh(&scavenger_list); ++ if (expired_fp) ++ ksmbd_scavenger_dispose_dh(expired_fp); ++ } while (expired_fp); + + if (found_fp_timeout == false) + break; +-- +2.53.0 + diff --git a/queue-7.0/series b/queue-7.0/series index d7baa45de2..c78f71d8a4 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -1,2 +1,3 @@ iommu-amd-fix-illegal-cap-mmio-access-in-iommu-debug.patch iommu-amd-remove-latent-out-of-bounds-access-in-iomm.patch +ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch