]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for all trees
authorSasha Levin <sashal@kernel.org>
Tue, 26 May 2026 11:34:37 +0000 (07:34 -0400)
committerSasha Levin <sashal@kernel.org>
Tue, 26 May 2026 11:34:37 +0000 (07:34 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
17 files changed:
queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch [new file with mode: 0644]
queue-6.1/series
queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch [new file with mode: 0644]
queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch [new file with mode: 0644]
queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch [new file with mode: 0644]
queue-6.12/sched-deadline-fix-dl_server-behaviour.patch [new file with mode: 0644]
queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch [new file with mode: 0644]
queue-6.12/sched-deadline-fix-dl_server_stopped.patch [new file with mode: 0644]
queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch [new file with mode: 0644]
queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch [new file with mode: 0644]
queue-6.12/series
queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch [new file with mode: 0644]
queue-6.18/series
queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch [new file with mode: 0644]
queue-6.6/series
queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch [new file with mode: 0644]
queue-7.0/series

diff --git a/queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.1/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
new file mode 100644 (file)
index 0000000..6b3cecc
--- /dev/null
@@ -0,0 +1,42 @@
+From 419a3a65767d3f679f6ca7369fc3d5a0e83832a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 20:46:05 -0400
+Subject: Revert "x86/vdso: Fix output operand size of RDPID"
+
+This reverts commit 757a9e78a1c5b824d0a2b7de14c3cd8d841dfbee.
+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/segment.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
+index 7865f180eb087..2e7890dd58a47 100644
+--- a/arch/x86/include/asm/segment.h
++++ b/arch/x86/include/asm/segment.h
+@@ -243,7 +243,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
+ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+ {
+-      unsigned long p;
++      unsigned int p;
+       /*
+        * Load CPU and node number from the GDT.  LSL is faster than RDTSCP
+@@ -253,10 +253,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+        *
+        * If RDPID is available, use it.
+        */
+-      alternative_io ("lsl %[seg],%k[p]",
+-                      "rdpid %[p]",
++      alternative_io ("lsl %[seg],%[p]",
++                      ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+                       X86_FEATURE_RDPID,
+-                      [p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
++                      [p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+       if (cpu)
+               *cpu = (p & VDSO_CPUNODE_MASK);
+-- 
+2.53.0
+
index 2d89ee749e10acd76907eb53efef2a2c8f494ad3..55ebc556d70bb69c16734722494e33f26781cff4 100644 (file)
@@ -833,3 +833,4 @@ net-rds-reset-op_nents-when-zerocopy-page-pin-fails.patch
 io_uring-prevent-opcode-speculation.patch
 s390-debug-reject-zero-length-input-before-trimming-.patch
 wifi-mac80211-check-tdls-flag-in-ieee80211_tdls_oper.patch
+revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
diff --git a/queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-6.12/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch
new file mode 100644 (file)
index 0000000..075a253
--- /dev/null
@@ -0,0 +1,260 @@
+From b2ecb0aacf48f499ce941e3aff0841f6ff44b069 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Apr 2026 23:08:56 +0900
+Subject: ksmbd: close durable scavenger races against m_fp_list lookups
+
+From: DaeMyung Kang <charsyam@gmail.com>
+
+[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ]
+
+ksmbd_durable_scavenger() has two related races against any walker
+that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode()
+(used by ksmbd_vfs_rename) and the share-mode checks in
+fs/smb/server/smb_common.c.
+
+(1) fp->node list-head reuse.  Durable-preserved handles can remain
+linked on f_ci->m_fp_list after session teardown so share-mode checks
+still see them while the handle is reconnectable.  The scavenger
+collected expired handles by adding fp->node to a local
+scavenger_list after removing them from the global durable idr.
+Because fp->node is the same list_head used by m_fp_list,
+list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links
+and corrupts both lists.  CONFIG_DEBUG_LIST can report this on the
+share-mode walk path.
+
+(2) Refcount race against m_fp_list walkers.  The scavenger qualifies
+an expired durable handle with atomic_read(&fp->refcount) > 1 and
+fp->conn under global_ft.lock, removes fp from global_ft, then drops
+global_ft.lock before unlinking fp from m_fp_list and freeing it.
+During that gap fp is still linked on m_fp_list with f_state ==
+FP_INITED.  ksmbd_lookup_fd_inode() under m_lock read calls
+ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and
+takes a live reference; the scavenger then unlinks and frees fp
+while the holder owns a reference, leading to UAF on the holder's
+subsequent ksmbd_fd_put() and on any field reads performed by a
+concurrent share-mode walker that iterates m_fp_list without taking
+ksmbd_fp_get() (smb_check_perm_dleases-like paths).
+
+Fix both:
+
+  * Stop reusing fp->node as a scavenger-private list node.  Remove
+    one expired handle from global_ft under global_ft.lock, take an
+    explicit transient reference, drop the lock, unlink fp->node
+    from m_fp_list under f_ci->m_lock, then drop both the durable
+    lifetime and transient references with atomic_sub_and_test(2,
+    &fp->refcount).  If the scavenger is the last putter the close
+    runs there; otherwise an in-flight holder that already raced
+    through the m_fp_list lookup owns the final close via its
+    ksmbd_fd_put() path.  The one-at-a-time disposal can rescan the
+    durable idr when multiple handles expire in the same pass, but
+    durable scavenging is a background expiration path and the final
+    full scan recomputes min_timeout before the next wait.
+
+  * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right
+    after idr_remove(), so a delayed final close from a holder that
+    snatched fp does not re-issue idr_remove() on a persistent id
+    that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have
+    already handed out to a brand-new durable handle.
+
+  * Bypass the per-conn open_files_count decrement in
+    __put_fd_final() when fp is detached from any session table
+    (fp->conn cleared by session_fd_check() at durable preserve --
+    paired with the volatile_id clear at unpublish, so checking
+    fp->conn alone is sufficient).  The walker that owns the final
+    close runs from an unrelated work->conn whose
+    stats.open_files_count never tracked this durable fp; without
+    this guard the holder would underflow that unrelated counter.
+
+The two races are folded into one patch because patch (1) alone
+cleans up the corrupted list but leaves a deterministic UAF window
+for m_fp_list walkers that the transient-reference and
+persistent_id discipline in (2) close; bisecting onto an
+intermediate state would land on a UAF that pre-patch chaos merely
+made less reproducible.
+
+Validation:
+  * CONFIG_DEBUG_LIST coverage for the list_head reuse path.
+  * KASAN-enabled direct SMB2 durable-handle coverage that exercised
+    ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode()
+    returns while durable handles expired under concurrent rename
+    lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING
+    reports.
+  * checkpatch --strict
+  * make -j$(nproc) M=fs/smb/server
+
+Fixes: d484d621d40f ("ksmbd: add durable scavenger timer")
+Signed-off-by: DaeMyung Kang <charsyam@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/vfs_cache.c | 104 ++++++++++++++++++++++++++++----------
+ 1 file changed, 77 insertions(+), 27 deletions(-)
+
+diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
+index d29cc1d01bd2c..a8fed467e9b69 100644
+--- a/fs/smb/server/vfs_cache.c
++++ b/fs/smb/server/vfs_cache.c
+@@ -325,6 +325,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+               return;
+       idr_remove(global_ft.idr, fp->persistent_id);
++      /*
++       * Clear persistent_id so a later __ksmbd_close_fd() that runs from a
++       * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode()
++       * walker held the final reference) does not re-issue idr_remove() on
++       * an id that idr_alloc_cyclic() may have already handed out to a new
++       * durable handle.
++       */
++      fp->persistent_id = KSMBD_NO_FID;
+ }
+ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+@@ -417,6 +425,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft,
+ static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
+ {
++      /*
++       * Detached durable fp -- session_fd_check() cleared fp->conn at
++       * preserve, so this fp is no longer tracked by any conn's
++       * stats.open_files_count.  This happens when
++       * ksmbd_scavenger_dispose_dh() hands the final close off to an
++       * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn
++       * is unrelated to the conn that originally opened the handle; close
++       * via the NULL-ft path so we do not underflow that unrelated
++       * counter.
++       */
++      if (!fp->conn) {
++              __ksmbd_close_fd(NULL, fp);
++              return;
++      }
+       __ksmbd_close_fd(&work->sess->file_table, fp);
+       atomic_dec(&work->conn->stats.open_files_count);
+ }
+@@ -788,24 +810,37 @@ static bool ksmbd_durable_scavenger_alive(void)
+       return true;
+ }
+-static void ksmbd_scavenger_dispose_dh(struct list_head *head)
++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp)
+ {
+-      while (!list_empty(head)) {
+-              struct ksmbd_file *fp;
++      /*
++       * Durable-preserved fp can remain linked on f_ci->m_fp_list for
++       * share-mode checks.  Unlink it before final close; fp->node is not
++       * available as a scavenger-private list node because re-adding it to
++       * another list corrupts m_fp_list.
++       */
++      down_write(&fp->f_ci->m_lock);
++      list_del_init(&fp->node);
++      up_write(&fp->f_ci->m_lock);
+-              fp = list_first_entry(head, struct ksmbd_file, node);
+-              list_del_init(&fp->node);
++      /*
++       * Drop both the durable lifetime reference and the transient reference
++       * taken by the scavenger under global_ft.lock.  If a concurrent
++       * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp
++       * before the unlink above, that holder owns the final close via
++       * ksmbd_fd_put() -> __ksmbd_close_fd().  Otherwise the scavenger is
++       * the last putter and finalises fp here.
++       */
++      if (atomic_sub_and_test(2, &fp->refcount))
+               __ksmbd_close_fd(NULL, fp);
+-      }
+ }
+ static int ksmbd_durable_scavenger(void *dummy)
+ {
+       struct ksmbd_file *fp = NULL;
++      struct ksmbd_file *expired_fp;
+       unsigned int id;
+       unsigned int min_timeout = 1;
+       bool found_fp_timeout;
+-      LIST_HEAD(scavenger_list);
+       unsigned long remaining_jiffies;
+       __module_get(THIS_MODULE);
+@@ -815,8 +850,6 @@ static int ksmbd_durable_scavenger(void *dummy)
+               if (try_to_freeze())
+                       continue;
+-              found_fp_timeout = false;
+-
+               remaining_jiffies = wait_event_timeout(dh_wq,
+                                  ksmbd_durable_scavenger_alive() == false,
+                                  __msecs_to_jiffies(min_timeout));
+@@ -825,23 +858,39 @@ static int ksmbd_durable_scavenger(void *dummy)
+               else
+                       min_timeout = DURABLE_HANDLE_MAX_TIMEOUT;
+-              write_lock(&global_ft.lock);
+-              idr_for_each_entry(global_ft.idr, fp, id) {
+-                      if (!fp->durable_timeout)
+-                              continue;
+-
+-                      if (atomic_read(&fp->refcount) > 1 ||
+-                          fp->conn)
+-                              continue;
+-
+-                      found_fp_timeout = true;
+-                      if (fp->durable_scavenger_timeout <=
+-                          jiffies_to_msecs(jiffies)) {
+-                              __ksmbd_remove_durable_fd(fp);
+-                              list_add(&fp->node, &scavenger_list);
+-                      } else {
++              do {
++                      expired_fp = NULL;
++                      found_fp_timeout = false;
++
++                      write_lock(&global_ft.lock);
++                      idr_for_each_entry(global_ft.idr, fp, id) {
+                               unsigned long durable_timeout;
++                              if (!fp->durable_timeout)
++                                      continue;
++
++                              if (atomic_read(&fp->refcount) > 1 ||
++                                  fp->conn)
++                                      continue;
++
++                              found_fp_timeout = true;
++                              if (fp->durable_scavenger_timeout <=
++                                  jiffies_to_msecs(jiffies)) {
++                                      __ksmbd_remove_durable_fd(fp);
++                                      /*
++                                       * Take a transient reference so fp
++                                       * cannot be freed by an in-flight
++                                       * ksmbd_lookup_fd_inode() that found
++                                       * it through f_ci->m_fp_list while we
++                                       * drop global_ft.lock and reach the
++                                       * m_fp_list unlink in
++                                       * ksmbd_scavenger_dispose_dh().
++                                       */
++                                      atomic_inc(&fp->refcount);
++                                      expired_fp = fp;
++                                      break;
++                              }
++
+                               durable_timeout =
+                                       fp->durable_scavenger_timeout -
+                                               jiffies_to_msecs(jiffies);
+@@ -849,10 +898,11 @@ static int ksmbd_durable_scavenger(void *dummy)
+                               if (min_timeout > durable_timeout)
+                                       min_timeout = durable_timeout;
+                       }
+-              }
+-              write_unlock(&global_ft.lock);
++                      write_unlock(&global_ft.lock);
+-              ksmbd_scavenger_dispose_dh(&scavenger_list);
++                      if (expired_fp)
++                              ksmbd_scavenger_dispose_dh(expired_fp);
++              } while (expired_fp);
+               if (found_fp_timeout == false)
+                       break;
+-- 
+2.53.0
+
diff --git a/queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.12/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
new file mode 100644 (file)
index 0000000..001fe8f
--- /dev/null
@@ -0,0 +1,42 @@
+From b33550234deaf50dfd7a45dd61c74481a920c21a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 20:45:41 -0400
+Subject: Revert "x86/vdso: Fix output operand size of RDPID"
+
+This reverts commit d607e6b349b014df1d2d0399f6667322626450e0.
+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/segment.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
+index 00cefbb59fa98..9d6411c659205 100644
+--- a/arch/x86/include/asm/segment.h
++++ b/arch/x86/include/asm/segment.h
+@@ -244,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
+ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+ {
+-      unsigned long p;
++      unsigned int p;
+       /*
+        * Load CPU and node number from the GDT.  LSL is faster than RDTSCP
+@@ -254,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+        *
+        * If RDPID is available, use it.
+        */
+-      alternative_io ("lsl %[seg],%k[p]",
+-                      "rdpid %[p]",
++      alternative_io ("lsl %[seg],%[p]",
++                      ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+                       X86_FEATURE_RDPID,
+-                      [p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
++                      [p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+       if (cpu)
+               *cpu = (p & VDSO_CPUNODE_MASK);
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch b/queue-6.12/sched-deadline-always-stop-dl-server-before-changing.patch
new file mode 100644 (file)
index 0000000..3d12227
--- /dev/null
@@ -0,0 +1,53 @@
+From e9c6d98eed1da1a5e7d1c7e416b262bc9a26c0e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 23:11:15 +0200
+Subject: sched/deadline: Always stop dl-server before changing parameters
+
+From: Juri Lelli <juri.lelli@redhat.com>
+
+commit bb4700adc3abec34c0a38b64f66258e4e233fc16 upstream.
+
+Commit cccb45d7c4295 ("sched/deadline: Less agressive dl_server
+handling") reduced dl-server overhead by delaying disabling servers only
+after there are no fair task around for a whole period, which means that
+deadline entities are not dequeued right away on a server stop event.
+However, the delay opens up a window in which a request for changing
+server parameters can break per-runqueue running_bw tracking, as
+reported by Yuri.
+
+Close the problematic window by unconditionally calling dl_server_stop()
+before applying the new parameters (ensuring deadline entities go
+through an actual dequeue).
+
+Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
+Reported-by: Yuri Andriaccio <yurand2000@gmail.com>
+Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Link: https://lore.kernel.org/r/20250721-upstream-fix-dlserver-lessaggressive-b4-v1-1-4ebc10c87e40@redhat.com
+Signed-off-by: Lukas Beckmann <lbckmnn@mailbox.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/debug.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index 7d14e9fa53ac3..564ea17ae405e 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -378,10 +378,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
+                       return  -EINVAL;
+               }
+-              if (rq->cfs.h_nr_queued) {
+-                      update_rq_clock(rq);
+-                      dl_server_stop(&rq->fair_server);
+-              }
++              update_rq_clock(rq);
++              dl_server_stop(&rq->fair_server);
+               retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-fix-dl_server-behaviour.patch b/queue-6.12/sched-deadline-fix-dl_server-behaviour.patch
new file mode 100644 (file)
index 0000000..99daacd
--- /dev/null
@@ -0,0 +1,153 @@
+From fa6e456ab9d98aabbc46a269473f281a87b336e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 23:11:17 +0200
+Subject: sched/deadline: Fix dl_server behaviour
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a3a70caf7906708bf9bbc80018752a6b36543808 upstream.
+
+John reported undesirable behaviour with the dl_server since commit:
+cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling").
+
+When starving fair tasks on purpose (starting spinning FIFO tasks),
+his fair workload, which often goes (briefly) idle, would delay fair
+invocations for a second, running one invocation per second was both
+unexpected and terribly slow.
+
+The reason this happens is that when dl_se->server_pick_task() returns
+NULL, indicating no runnable tasks, it would yield, pushing any later
+jobs out a whole period (1 second).
+
+Instead simply stop the server. This should restore behaviour in that
+a later wakeup (which restarts the server) will be able to continue
+running (subject to the CBS wakeup rules).
+
+Notably, this does not re-introduce the behaviour cccb45d7c4295 set
+out to solve, any start/stop cycle is naturally throttled by the timer
+period (no active cancel).
+
+Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
+Reported-by: John Stultz <jstultz@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: John Stultz <jstultz@google.com>
+Closes: https://lore.kernel.org/regressions/04657838-46d1-432d-95e1-eb73b930b032@mailbox.org
+Signed-off-by: Lukas Beckmann <lbckmnn@mailbox.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h   |  1 -
+ kernel/sched/deadline.c | 23 ++---------------------
+ kernel/sched/sched.h    | 33 +++++++++++++++++++++++++++++++--
+ 3 files changed, 33 insertions(+), 24 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 464d281aa2e49..f9ffe42cae171 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -674,7 +674,6 @@ struct sched_dl_entity {
+       unsigned int                    dl_defer          : 1;
+       unsigned int                    dl_defer_armed    : 1;
+       unsigned int                    dl_defer_running  : 1;
+-      unsigned int                    dl_server_idle    : 1;
+       /*
+        * Bandwidth enforcement timer. Each -deadline task has its
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 609783d7de290..a6c699e43111d 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1621,10 +1621,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
+ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
+ {
+       /* 0 runtime = fair server disabled */
+-      if (dl_se->dl_runtime) {
+-              dl_se->dl_server_idle = 0;
++      if (dl_se->dl_runtime)
+               update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+-      }
+ }
+ /*
+@@ -1868,20 +1866,6 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
+       dl_se->dl_server_active = 0;
+ }
+-static bool dl_server_stopped(struct sched_dl_entity *dl_se)
+-{
+-      if (!dl_se->dl_server_active)
+-              return true;
+-
+-      if (dl_se->dl_server_idle) {
+-              dl_server_stop(dl_se);
+-              return true;
+-      }
+-
+-      dl_se->dl_server_idle = 1;
+-      return false;
+-}
+-
+ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+                   dl_server_pick_f pick_task)
+ {
+@@ -2637,10 +2621,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
+       if (dl_server(dl_se)) {
+               p = dl_se->server_pick_task(dl_se);
+               if (!p) {
+-                      if (!dl_server_stopped(dl_se)) {
+-                              dl_se->dl_yielded = 1;
+-                              update_curr_dl_se(rq, dl_se, 0);
+-                      }
++                      dl_server_stop(dl_se);
+                       goto again;
+               }
+               rq->dl_server = dl_se;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 9391ff62cdaaa..7956abeb9154e 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -377,10 +377,39 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
+  *   dl_server_update() -- called from update_curr_common(), propagates runtime
+  *                         to the server.
+  *
+- *   dl_server_start()
+- *   dl_server_stop()  -- start/stop the server when it has (no) tasks.
++ *   dl_server_start() -- start the server when it has tasks; it will stop
++ *                      automatically when there are no more tasks, per
++ *                      dl_se::server_pick() returning NULL.
++ *
++ *   dl_server_stop() -- (force) stop the server; use when updating
++ *                       parameters.
+  *
+  *   dl_server_init() -- initializes the server.
++ *
++ * When started the dl_server will (per dl_defer) schedule a timer for its
++ * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
++ * server will run at the very end of its period.
++ *
++ * This is done such that any runtime from the target class can be accounted
++ * against the server -- through dl_server_update() above -- such that when it
++ * becomes time to run, it might already be out of runtime and get deferred
++ * until the next period. In this case dl_server_timer() will alternate
++ * between defer and replenish but never actually enqueue the server.
++ *
++ * Only when the target class does not manage to exhaust the server's runtime
++ * (there's actualy starvation in the given period), will the dl_server get on
++ * the runqueue. Once queued it will pick tasks from the target class and run
++ * them until either its runtime is exhaused, at which point its back to
++ * dl_server_timer, or until there are no more tasks to run, at which point
++ * the dl_server stops itself.
++ *
++ * By stopping at this point the dl_server retains bandwidth, which, if a new
++ * task wakes up imminently (starting the server again), can be used --
++ * subject to CBS wakeup rules -- without having to wait for the next period.
++ *
++ * Additionally, because of the dl_defer behaviour the start/stop behaviour is
++ * naturally thottled to once per period, avoiding high context switch
++ * workloads from spamming the hrtimer program/cancel paths.
+  */
+ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
+ extern void dl_server_start(struct sched_dl_entity *dl_se);
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch b/queue-6.12/sched-deadline-fix-dl_server-getting-stuck.patch
new file mode 100644 (file)
index 0000000..4b4e855
--- /dev/null
@@ -0,0 +1,162 @@
+From cf7902eb8cf1b0096b2e1b6eceba972e7cc1c4bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 23:11:16 +0200
+Subject: sched/deadline: Fix dl_server getting stuck
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4ae8d9aa9f9dc7137ea5e564d79c5aa5af1bc45c upstream.
+
+John found it was easy to hit lockup warnings when running locktorture
+on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
+("sched/deadline: Less agressive dl_server handling").
+
+While debugging it seems there is a chance where we end up with the
+dl_server dequeued, with dl_se->dl_server_active. This causes
+dl_server_start() to return without enqueueing the dl_server, thus it
+fails to run when RT tasks starve the cpu.
+
+When this happens, dl_server_timer() catches the
+'!dl_se->server_has_tasks(dl_se)' case, which then calls
+replenish_dl_entity() and dl_server_stopped() and finally return
+HRTIMER_NO_RESTART.
+
+This ends in no new timer and also no enqueue, leaving the dl_server
+'dead', allowing starvation.
+
+What should have happened is for the bandwidth timer to start the
+zero-laxity timer, which in turn would enqueue the dl_server and cause
+dl_se->server_pick_task() to be called -- which will stop the
+dl_server if no fair tasks are observed for a whole period.
+
+IOW, it is totally irrelevant if there are fair tasks at the moment of
+bandwidth refresh.
+
+This removes all dl_se->server_has_tasks() users, so remove the whole
+thing.
+
+Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
+Reported-by: John Stultz <jstultz@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: John Stultz <jstultz@google.com>
+[ adjust renamed variable in fair_server_has_tasks (which this patch
+removes) ]
+Signed-off-by: Lukas Beckmann <lbckmnn@mailbox.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h   |  1 -
+ kernel/sched/deadline.c | 12 +-----------
+ kernel/sched/fair.c     |  7 +------
+ kernel/sched/sched.h    |  4 ----
+ 4 files changed, 2 insertions(+), 22 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 299a65a92d2e6..464d281aa2e49 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -701,7 +701,6 @@ struct sched_dl_entity {
+        * runnable task.
+        */
+       struct rq                       *rq;
+-      dl_server_has_tasks_f           server_has_tasks;
+       dl_server_pick_f                server_pick_task;
+ #ifdef CONFIG_RT_MUTEXES
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 6ff9055a69811..609783d7de290 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -916,7 +916,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
+        */
+       if (dl_se->dl_defer && !dl_se->dl_defer_running &&
+           dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
+-              if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
++              if (!is_dl_boosted(dl_se)) {
+                       /*
+                        * Set dl_se->dl_defer_armed and dl_throttled variables to
+@@ -1201,8 +1201,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
+ /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
+ static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
+-static bool dl_server_stopped(struct sched_dl_entity *dl_se);
+-
+ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
+ {
+       struct rq *rq = rq_of_dl_se(dl_se);
+@@ -1220,12 +1218,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
+               if (!dl_se->dl_runtime)
+                       return HRTIMER_NORESTART;
+-              if (!dl_se->server_has_tasks(dl_se)) {
+-                      replenish_dl_entity(dl_se);
+-                      dl_server_stopped(dl_se);
+-                      return HRTIMER_NORESTART;
+-              }
+-
+               if (dl_se->dl_defer_armed) {
+                       /*
+                        * First check if the server could consume runtime in background.
+@@ -1891,11 +1883,9 @@ static bool dl_server_stopped(struct sched_dl_entity *dl_se)
+ }
+ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+-                  dl_server_has_tasks_f has_tasks,
+                   dl_server_pick_f pick_task)
+ {
+       dl_se->rq = rq;
+-      dl_se->server_has_tasks = has_tasks;
+       dl_se->server_pick_task = pick_task;
+ }
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d26e078d0623f..f36512892adf9 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -9058,11 +9058,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
+       return pick_next_task_fair(rq, prev, NULL);
+ }
+-static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+-{
+-      return !!dl_se->rq->cfs.nr_running;
+-}
+-
+ static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
+ {
+       return pick_task_fair(dl_se->rq);
+@@ -9074,7 +9069,7 @@ void fair_server_init(struct rq *rq)
+       init_dl_entity(dl_se);
+-      dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
++      dl_server_init(dl_se, rq, fair_server_pick_task);
+ }
+ /*
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index a09e2d25edd57..9391ff62cdaaa 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -371,9 +371,6 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
+  *
+  *   dl_se::rq -- runqueue we belong to.
+  *
+- *   dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
+- *                                server when it runs out of tasks to run.
+- *
+  *   dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
+  *                           returns NULL.
+  *
+@@ -389,7 +386,6 @@ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
+ extern void dl_server_start(struct sched_dl_entity *dl_se);
+ extern void dl_server_stop(struct sched_dl_entity *dl_se);
+ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+-                  dl_server_has_tasks_f has_tasks,
+                   dl_server_pick_f pick_task);
+ extern void dl_server_update_idle_time(struct rq *rq,
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-fix-dl_server_stopped.patch b/queue-6.12/sched-deadline-fix-dl_server_stopped.patch
new file mode 100644 (file)
index 0000000..114bcee
--- /dev/null
@@ -0,0 +1,39 @@
+From 125f84b14921cf6acf104ac26f3ce38448538fa2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 23:11:14 +0200
+Subject: sched/deadline: Fix dl_server_stopped()
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 4717432dfd99bbd015b6782adca216c6f9340038 upstream.
+
+Commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling")
+introduces dl_server_stopped(). But it is obvious that dl_server_stopped()
+should return true if dl_se->dl_server_active is 0.
+
+Fixes: cccb45d7c429 ("sched/deadline: Less agressive dl_server handling")
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20250809130419.1980742-1-chenhuacai@loongson.cn
+Signed-off-by: Lukas Beckmann <lbckmnn@mailbox.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/deadline.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 9c5fa95b345a5..6ff9055a69811 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1879,7 +1879,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
+ static bool dl_server_stopped(struct sched_dl_entity *dl_se)
+ {
+       if (!dl_se->dl_server_active)
+-              return false;
++              return true;
+       if (dl_se->dl_server_idle) {
+               dl_server_stop(dl_se);
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch b/queue-6.12/sched-deadline-less-agressive-dl_server-handling.patch
new file mode 100644 (file)
index 0000000..770afbe
--- /dev/null
@@ -0,0 +1,165 @@
+From bf89db6d1042902e67f75e43ac5ce8105cf26c26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 23:11:13 +0200
+Subject: sched/deadline: Less agressive dl_server handling
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit cccb45d7c4295bbfeba616582d0249f2d21e6df5 upstream.
+
+Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default
+bandwidth control") caused a significant dip in his favourite
+benchmark of the day. Simply disabling dl_server cured things.
+
+His workload hammers the 0->1, 1->0 transitions, and the
+dl_server_{start,stop}() overhead kills it -- fairly obviously a bad
+idea in hind sight and all that.
+
+Change things around to only disable the dl_server when there has not
+been a fair task around for a whole period. Since the default period
+is 1 second, this ensures the benchmark never trips this, overhead
+gone.
+
+Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server")
+Reported-by: Chris Mason <clm@meta.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
+Acked-by: Juri Lelli <juri.lelli@redhat.com>
+Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org
+[ adjust context for renamed/removed variable names ]
+Signed-off-by: Lukas Beckmann <lbckmnn@mailbox.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h   |  1 +
+ kernel/sched/deadline.c | 25 ++++++++++++++++++++++---
+ kernel/sched/fair.c     |  9 ---------
+ 3 files changed, 23 insertions(+), 12 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 2e4c437c7c902..299a65a92d2e6 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -674,6 +674,7 @@ struct sched_dl_entity {
+       unsigned int                    dl_defer          : 1;
+       unsigned int                    dl_defer_armed    : 1;
+       unsigned int                    dl_defer_running  : 1;
++      unsigned int                    dl_server_idle    : 1;
+       /*
+        * Bandwidth enforcement timer. Each -deadline task has its
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 1ef891f8e3f2f..9c5fa95b345a5 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1201,6 +1201,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
+ /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
+ static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
++static bool dl_server_stopped(struct sched_dl_entity *dl_se);
++
+ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
+ {
+       struct rq *rq = rq_of_dl_se(dl_se);
+@@ -1220,6 +1222,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
+               if (!dl_se->server_has_tasks(dl_se)) {
+                       replenish_dl_entity(dl_se);
++                      dl_server_stopped(dl_se);
+                       return HRTIMER_NORESTART;
+               }
+@@ -1626,8 +1629,10 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
+ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
+ {
+       /* 0 runtime = fair server disabled */
+-      if (dl_se->dl_runtime)
++      if (dl_se->dl_runtime) {
++              dl_se->dl_server_idle = 0;
+               update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
++      }
+ }
+ /*
+@@ -1850,7 +1855,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
+               setup_new_dl_entity(dl_se);
+       }
+-      if (!dl_se->dl_runtime)
++      if (!dl_se->dl_runtime || dl_se->dl_server_active)
+               return;
+       dl_se->dl_server_active = 1;
+@@ -1871,6 +1876,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
+       dl_se->dl_server_active = 0;
+ }
++static bool dl_server_stopped(struct sched_dl_entity *dl_se)
++{
++      if (!dl_se->dl_server_active)
++              return false;
++
++      if (dl_se->dl_server_idle) {
++              dl_server_stop(dl_se);
++              return true;
++      }
++
++      dl_se->dl_server_idle = 1;
++      return false;
++}
++
+ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+                   dl_server_has_tasks_f has_tasks,
+                   dl_server_pick_f pick_task)
+@@ -2628,7 +2647,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
+       if (dl_server(dl_se)) {
+               p = dl_se->server_pick_task(dl_se);
+               if (!p) {
+-                      if (dl_server_active(dl_se)) {
++                      if (!dl_server_stopped(dl_se)) {
+                               dl_se->dl_yielded = 1;
+                               update_curr_dl_se(rq, dl_se, 0);
+                       }
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a0a47e50b71ca..d26e078d0623f 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5972,7 +5972,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+       struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+       struct sched_entity *se;
+       long queued_delta, runnable_delta, idle_task_delta, delayed_delta, dequeue = 1;
+-      long rq_h_nr_queued = rq->cfs.h_nr_queued;
+       raw_spin_lock(&cfs_b->lock);
+       /* This will start the period timer if necessary */
+@@ -6059,10 +6058,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+       /* At this point se is NULL and we are at root level*/
+       sub_nr_running(rq, queued_delta);
+-
+-      /* Stop the fair server if throttling resulted in no runnable tasks */
+-      if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+-              dl_server_stop(&rq->fair_server);
+ done:
+       /*
+        * Note: distribution will already see us throttled via the
+@@ -7162,7 +7157,6 @@ static void set_next_buddy(struct sched_entity *se);
+ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ {
+       bool was_sched_idle = sched_idle_rq(rq);
+-      int rq_h_nr_queued = rq->cfs.h_nr_queued;
+       bool task_sleep = flags & DEQUEUE_SLEEP;
+       bool task_delayed = flags & DEQUEUE_DELAYED;
+       struct task_struct *p = NULL;
+@@ -7251,9 +7245,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+       sub_nr_running(rq, h_nr_queued);
+-      if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+-              dl_server_stop(&rq->fair_server);
+-
+       /* balance early to pull high priority tasks */
+       if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+               rq->next_balance = jiffies;
+-- 
+2.53.0
+
diff --git a/queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch b/queue-6.12/sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch
new file mode 100644 (file)
index 0000000..0aa5c8e
--- /dev/null
@@ -0,0 +1,84 @@
+From 1d75df038698b9fa4534cdcdbd98588ea50c6c1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Oct 2025 00:17:27 +0530
+Subject: sched/deadline: Stop dl_server before CPU goes offline
+
+From: Peter Zijlstra (Intel) <peterz@infradead.org>
+
+[ Upstream commit ee6e44dfe6e50b4a5df853d933a96bdff5309e6e ]
+
+IBM CI tool reported kernel warning[1] when running a CPU removal
+operation through drmgr[2]. i.e "drmgr -c cpu -r -q 1"
+
+WARNING: CPU: 0 PID: 0 at kernel/sched/cpudeadline.c:219 cpudl_set+0x58/0x170
+NIP [c0000000002b6ed8] cpudl_set+0x58/0x170
+LR [c0000000002b7cb8] dl_server_timer+0x168/0x2a0
+Call Trace:
+[c000000002c2f8c0] init_stack+0x78c0/0x8000 (unreliable)
+[c0000000002b7cb8] dl_server_timer+0x168/0x2a0
+[c00000000034df84] __hrtimer_run_queues+0x1a4/0x390
+[c00000000034f624] hrtimer_interrupt+0x124/0x300
+[c00000000002a230] timer_interrupt+0x140/0x320
+
+Git bisects to: commit 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck")
+
+This happens since:
+- dl_server hrtimer gets enqueued close to cpu offline, when
+  kthread_park enqueues a fair task.
+- CPU goes offline and drmgr removes it from cpu_present_mask.
+- hrtimer fires and warning is hit.
+
+Fix it by stopping the dl_server before CPU is marked dead.
+
+[1]: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com/
+[2]: https://github.com/ibm-power-utilities/powerpc-utils/tree/next/src/drmgr
+
+[sshegde: wrote the changelog and tested it]
+Fixes: 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck")
+Closes: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
+Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c     | 2 ++
+ kernel/sched/deadline.c | 3 +++
+ 2 files changed, 5 insertions(+)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 9b238c9c71c67..1b1ddd24cb227 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8319,10 +8319,12 @@ int sched_cpu_dying(unsigned int cpu)
+       sched_tick_stop(cpu);
+       rq_lock_irqsave(rq, &rf);
++      update_rq_clock(rq);
+       if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) {
+               WARN(true, "Dying CPU not properly vacated!");
+               dump_rq_tasks(rq, KERN_WARNING);
+       }
++      dl_server_stop(&rq->fair_server);
+       rq_unlock_irqrestore(rq, &rf);
+       calc_load_migrate(rq);
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index a6c699e43111d..cb8eff0ebd228 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1848,6 +1848,9 @@ void dl_server_start(struct sched_dl_entity *dl_se)
+       if (!dl_se->dl_runtime || dl_se->dl_server_active)
+               return;
++      if (WARN_ON_ONCE(!cpu_online(cpu_of(rq))))
++              return;
++
+       dl_se->dl_server_active = 1;
+       enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+       if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+-- 
+2.53.0
+
index b7ea8babb3c5b37213850ef44573d13a839098ca..8f81d2a62d3b0e819515ddb806f55c466b14db16 100644 (file)
@@ -11,3 +11,11 @@ revert-perf-python-add-parse_events-function.patch
 revert-perf-tool_pmu-factor-tool-events-into-their-o.patch
 bridge-mrp-reject-zero-test-interval-to-avoid-oom-pa.patch
 spi-spi-dw-dma-fix-print-error-log-when-wait-finish-.patch
+revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
+sched-deadline-less-agressive-dl_server-handling.patch
+sched-deadline-fix-dl_server_stopped.patch
+sched-deadline-always-stop-dl-server-before-changing.patch
+sched-deadline-fix-dl_server-getting-stuck.patch
+sched-deadline-fix-dl_server-behaviour.patch
+sched-deadline-stop-dl_server-before-cpu-goes-offlin.patch
+ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch
diff --git a/queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-6.18/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch
new file mode 100644 (file)
index 0000000..e4466d3
--- /dev/null
@@ -0,0 +1,260 @@
+From 9c19b2a43886dcf9c15e41d42b4558e71620556a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Apr 2026 23:08:56 +0900
+Subject: ksmbd: close durable scavenger races against m_fp_list lookups
+
+From: DaeMyung Kang <charsyam@gmail.com>
+
+[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ]
+
+ksmbd_durable_scavenger() has two related races against any walker
+that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode()
+(used by ksmbd_vfs_rename) and the share-mode checks in
+fs/smb/server/smb_common.c.
+
+(1) fp->node list-head reuse.  Durable-preserved handles can remain
+linked on f_ci->m_fp_list after session teardown so share-mode checks
+still see them while the handle is reconnectable.  The scavenger
+collected expired handles by adding fp->node to a local
+scavenger_list after removing them from the global durable idr.
+Because fp->node is the same list_head used by m_fp_list,
+list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links
+and corrupts both lists.  CONFIG_DEBUG_LIST can report this on the
+share-mode walk path.
+
+(2) Refcount race against m_fp_list walkers.  The scavenger qualifies
+an expired durable handle with atomic_read(&fp->refcount) > 1 and
+fp->conn under global_ft.lock, removes fp from global_ft, then drops
+global_ft.lock before unlinking fp from m_fp_list and freeing it.
+During that gap fp is still linked on m_fp_list with f_state ==
+FP_INITED.  ksmbd_lookup_fd_inode() under m_lock read calls
+ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and
+takes a live reference; the scavenger then unlinks and frees fp
+while the holder owns a reference, leading to UAF on the holder's
+subsequent ksmbd_fd_put() and on any field reads performed by a
+concurrent share-mode walker that iterates m_fp_list without taking
+ksmbd_fp_get() (smb_check_perm_dleases-like paths).
+
+Fix both:
+
+  * Stop reusing fp->node as a scavenger-private list node.  Remove
+    one expired handle from global_ft under global_ft.lock, take an
+    explicit transient reference, drop the lock, unlink fp->node
+    from m_fp_list under f_ci->m_lock, then drop both the durable
+    lifetime and transient references with atomic_sub_and_test(2,
+    &fp->refcount).  If the scavenger is the last putter the close
+    runs there; otherwise an in-flight holder that already raced
+    through the m_fp_list lookup owns the final close via its
+    ksmbd_fd_put() path.  The one-at-a-time disposal can rescan the
+    durable idr when multiple handles expire in the same pass, but
+    durable scavenging is a background expiration path and the final
+    full scan recomputes min_timeout before the next wait.
+
+  * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right
+    after idr_remove(), so a delayed final close from a holder that
+    snatched fp does not re-issue idr_remove() on a persistent id
+    that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have
+    already handed out to a brand-new durable handle.
+
+  * Bypass the per-conn open_files_count decrement in
+    __put_fd_final() when fp is detached from any session table
+    (fp->conn cleared by session_fd_check() at durable preserve --
+    paired with the volatile_id clear at unpublish, so checking
+    fp->conn alone is sufficient).  The walker that owns the final
+    close runs from an unrelated work->conn whose
+    stats.open_files_count never tracked this durable fp; without
+    this guard the holder would underflow that unrelated counter.
+
+The two races are folded into one patch because patch (1) alone
+cleans up the corrupted list but leaves a deterministic UAF window
+for m_fp_list walkers that the transient-reference and
+persistent_id discipline in (2) close; bisecting onto an
+intermediate state would land on a UAF that pre-patch chaos merely
+made less reproducible.
+
+Validation:
+  * CONFIG_DEBUG_LIST coverage for the list_head reuse path.
+  * KASAN-enabled direct SMB2 durable-handle coverage that exercised
+    ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode()
+    returns while durable handles expired under concurrent rename
+    lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING
+    reports.
+  * checkpatch --strict
+  * make -j$(nproc) M=fs/smb/server
+
+Fixes: d484d621d40f ("ksmbd: add durable scavenger timer")
+Signed-off-by: DaeMyung Kang <charsyam@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/vfs_cache.c | 104 ++++++++++++++++++++++++++++----------
+ 1 file changed, 77 insertions(+), 27 deletions(-)
+
+diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
+index d29cc1d01bd2c..a8fed467e9b69 100644
+--- a/fs/smb/server/vfs_cache.c
++++ b/fs/smb/server/vfs_cache.c
+@@ -325,6 +325,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+               return;
+       idr_remove(global_ft.idr, fp->persistent_id);
++      /*
++       * Clear persistent_id so a later __ksmbd_close_fd() that runs from a
++       * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode()
++       * walker held the final reference) does not re-issue idr_remove() on
++       * an id that idr_alloc_cyclic() may have already handed out to a new
++       * durable handle.
++       */
++      fp->persistent_id = KSMBD_NO_FID;
+ }
+ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+@@ -417,6 +425,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft,
+ static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
+ {
++      /*
++       * Detached durable fp -- session_fd_check() cleared fp->conn at
++       * preserve, so this fp is no longer tracked by any conn's
++       * stats.open_files_count.  This happens when
++       * ksmbd_scavenger_dispose_dh() hands the final close off to an
++       * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn
++       * is unrelated to the conn that originally opened the handle; close
++       * via the NULL-ft path so we do not underflow that unrelated
++       * counter.
++       */
++      if (!fp->conn) {
++              __ksmbd_close_fd(NULL, fp);
++              return;
++      }
+       __ksmbd_close_fd(&work->sess->file_table, fp);
+       atomic_dec(&work->conn->stats.open_files_count);
+ }
+@@ -788,24 +810,37 @@ static bool ksmbd_durable_scavenger_alive(void)
+       return true;
+ }
+-static void ksmbd_scavenger_dispose_dh(struct list_head *head)
++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp)
+ {
+-      while (!list_empty(head)) {
+-              struct ksmbd_file *fp;
++      /*
++       * Durable-preserved fp can remain linked on f_ci->m_fp_list for
++       * share-mode checks.  Unlink it before final close; fp->node is not
++       * available as a scavenger-private list node because re-adding it to
++       * another list corrupts m_fp_list.
++       */
++      down_write(&fp->f_ci->m_lock);
++      list_del_init(&fp->node);
++      up_write(&fp->f_ci->m_lock);
+-              fp = list_first_entry(head, struct ksmbd_file, node);
+-              list_del_init(&fp->node);
++      /*
++       * Drop both the durable lifetime reference and the transient reference
++       * taken by the scavenger under global_ft.lock.  If a concurrent
++       * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp
++       * before the unlink above, that holder owns the final close via
++       * ksmbd_fd_put() -> __ksmbd_close_fd().  Otherwise the scavenger is
++       * the last putter and finalises fp here.
++       */
++      if (atomic_sub_and_test(2, &fp->refcount))
+               __ksmbd_close_fd(NULL, fp);
+-      }
+ }
+ static int ksmbd_durable_scavenger(void *dummy)
+ {
+       struct ksmbd_file *fp = NULL;
++      struct ksmbd_file *expired_fp;
+       unsigned int id;
+       unsigned int min_timeout = 1;
+       bool found_fp_timeout;
+-      LIST_HEAD(scavenger_list);
+       unsigned long remaining_jiffies;
+       __module_get(THIS_MODULE);
+@@ -815,8 +850,6 @@ static int ksmbd_durable_scavenger(void *dummy)
+               if (try_to_freeze())
+                       continue;
+-              found_fp_timeout = false;
+-
+               remaining_jiffies = wait_event_timeout(dh_wq,
+                                  ksmbd_durable_scavenger_alive() == false,
+                                  __msecs_to_jiffies(min_timeout));
+@@ -825,23 +858,39 @@ static int ksmbd_durable_scavenger(void *dummy)
+               else
+                       min_timeout = DURABLE_HANDLE_MAX_TIMEOUT;
+-              write_lock(&global_ft.lock);
+-              idr_for_each_entry(global_ft.idr, fp, id) {
+-                      if (!fp->durable_timeout)
+-                              continue;
+-
+-                      if (atomic_read(&fp->refcount) > 1 ||
+-                          fp->conn)
+-                              continue;
+-
+-                      found_fp_timeout = true;
+-                      if (fp->durable_scavenger_timeout <=
+-                          jiffies_to_msecs(jiffies)) {
+-                              __ksmbd_remove_durable_fd(fp);
+-                              list_add(&fp->node, &scavenger_list);
+-                      } else {
++              do {
++                      expired_fp = NULL;
++                      found_fp_timeout = false;
++
++                      write_lock(&global_ft.lock);
++                      idr_for_each_entry(global_ft.idr, fp, id) {
+                               unsigned long durable_timeout;
++                              if (!fp->durable_timeout)
++                                      continue;
++
++                              if (atomic_read(&fp->refcount) > 1 ||
++                                  fp->conn)
++                                      continue;
++
++                              found_fp_timeout = true;
++                              if (fp->durable_scavenger_timeout <=
++                                  jiffies_to_msecs(jiffies)) {
++                                      __ksmbd_remove_durable_fd(fp);
++                                      /*
++                                       * Take a transient reference so fp
++                                       * cannot be freed by an in-flight
++                                       * ksmbd_lookup_fd_inode() that found
++                                       * it through f_ci->m_fp_list while we
++                                       * drop global_ft.lock and reach the
++                                       * m_fp_list unlink in
++                                       * ksmbd_scavenger_dispose_dh().
++                                       */
++                                      atomic_inc(&fp->refcount);
++                                      expired_fp = fp;
++                                      break;
++                              }
++
+                               durable_timeout =
+                                       fp->durable_scavenger_timeout -
+                                               jiffies_to_msecs(jiffies);
+@@ -849,10 +898,11 @@ static int ksmbd_durable_scavenger(void *dummy)
+                               if (min_timeout > durable_timeout)
+                                       min_timeout = durable_timeout;
+                       }
+-              }
+-              write_unlock(&global_ft.lock);
++                      write_unlock(&global_ft.lock);
+-              ksmbd_scavenger_dispose_dh(&scavenger_list);
++                      if (expired_fp)
++                              ksmbd_scavenger_dispose_dh(expired_fp);
++              } while (expired_fp);
+               if (found_fp_timeout == false)
+                       break;
+-- 
+2.53.0
+
index d35c0223a6253899327a275b0223958c3debd7c9..1a26a0def33e273c9d499f7bce7084dfba69ce38 100644 (file)
@@ -7,3 +7,4 @@ sched-employ-sched_change-guards.patch
 sched-deadline-fix-missing-enqueue_replenish-during-.patch
 bridge-mrp-reject-zero-test-interval-to-avoid-oom-pa.patch
 spi-spi-dw-dma-fix-print-error-log-when-wait-finish-.patch
+ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch
diff --git a/queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch b/queue-6.6/revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
new file mode 100644 (file)
index 0000000..0755872
--- /dev/null
@@ -0,0 +1,42 @@
+From c01dc464b7f0e8ef2a7b98a38a3a71ee049fc61c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 May 2026 20:45:55 -0400
+Subject: Revert "x86/vdso: Fix output operand size of RDPID"
+
+This reverts commit db82a94b0c109e7697a593f36783e7ef36073983.
+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/segment.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
+index 00cefbb59fa98..9d6411c659205 100644
+--- a/arch/x86/include/asm/segment.h
++++ b/arch/x86/include/asm/segment.h
+@@ -244,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
+ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+ {
+-      unsigned long p;
++      unsigned int p;
+       /*
+        * Load CPU and node number from the GDT.  LSL is faster than RDTSCP
+@@ -254,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
+        *
+        * If RDPID is available, use it.
+        */
+-      alternative_io ("lsl %[seg],%k[p]",
+-                      "rdpid %[p]",
++      alternative_io ("lsl %[seg],%[p]",
++                      ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+                       X86_FEATURE_RDPID,
+-                      [p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
++                      [p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+       if (cpu)
+               *cpu = (p & VDSO_CPUNODE_MASK);
+-- 
+2.53.0
+
index a6d241119c8b07fce08acaf80d0bad951b1d83f8..88e063ebc2248632e49144ab41b6542369f3975f 100644 (file)
@@ -7,3 +7,4 @@ driver-core-generalize-driver_override-in-struct-dev.patch
 driver-core-platform-use-generic-driver_override-inf.patch
 s390-debug-reject-zero-length-input-before-trimming-.patch
 wifi-mac80211-check-tdls-flag-in-ieee80211_tdls_oper.patch
+revert-x86-vdso-fix-output-operand-size-of-rdpid.patch
diff --git a/queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch b/queue-7.0/ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch
new file mode 100644 (file)
index 0000000..456cb0c
--- /dev/null
@@ -0,0 +1,259 @@
+From 5163e728f460c9baa126d1f0bf14beefd3a9fb9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Apr 2026 23:08:56 +0900
+Subject: ksmbd: close durable scavenger races against m_fp_list lookups
+
+From: DaeMyung Kang <charsyam@gmail.com>
+
+[ Upstream commit bf736184d063da1a552ffeff0481813599a182cc ]
+
+ksmbd_durable_scavenger() has two related races against any walker
+that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode()
+(used by ksmbd_vfs_rename) and the share-mode checks in
+fs/smb/server/smb_common.c.
+
+(1) fp->node list-head reuse.  Durable-preserved handles can remain
+linked on f_ci->m_fp_list after session teardown so share-mode checks
+still see them while the handle is reconnectable.  The scavenger
+collected expired handles by adding fp->node to a local
+scavenger_list after removing them from the global durable idr.
+Because fp->node is the same list_head used by m_fp_list,
+list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links
+and corrupts both lists.  CONFIG_DEBUG_LIST can report this on the
+share-mode walk path.
+
+(2) Refcount race against m_fp_list walkers.  The scavenger qualifies
+an expired durable handle with atomic_read(&fp->refcount) > 1 and
+fp->conn under global_ft.lock, removes fp from global_ft, then drops
+global_ft.lock before unlinking fp from m_fp_list and freeing it.
+During that gap fp is still linked on m_fp_list with f_state ==
+FP_INITED.  ksmbd_lookup_fd_inode() under m_lock read calls
+ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and
+takes a live reference; the scavenger then unlinks and frees fp
+while the holder owns a reference, leading to UAF on the holder's
+subsequent ksmbd_fd_put() and on any field reads performed by a
+concurrent share-mode walker that iterates m_fp_list without taking
+ksmbd_fp_get() (smb_check_perm_dleases-like paths).
+
+Fix both:
+
+  * Stop reusing fp->node as a scavenger-private list node.  Remove
+    one expired handle from global_ft under global_ft.lock, take an
+    explicit transient reference, drop the lock, unlink fp->node
+    from m_fp_list under f_ci->m_lock, then drop both the durable
+    lifetime and transient references with atomic_sub_and_test(2,
+    &fp->refcount).  If the scavenger is the last putter the close
+    runs there; otherwise an in-flight holder that already raced
+    through the m_fp_list lookup owns the final close via its
+    ksmbd_fd_put() path.  The one-at-a-time disposal can rescan the
+    durable idr when multiple handles expire in the same pass, but
+    durable scavenging is a background expiration path and the final
+    full scan recomputes min_timeout before the next wait.
+
+  * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right
+    after idr_remove(), so a delayed final close from a holder that
+    snatched fp does not re-issue idr_remove() on a persistent id
+    that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have
+    already handed out to a brand-new durable handle.
+
+  * Bypass the per-conn open_files_count decrement in
+    __put_fd_final() when fp is detached from any session table
+    (fp->conn cleared by session_fd_check() at durable preserve --
+    paired with the volatile_id clear at unpublish, so checking
+    fp->conn alone is sufficient).  The walker that owns the final
+    close runs from an unrelated work->conn whose
+    stats.open_files_count never tracked this durable fp; without
+    this guard the holder would underflow that unrelated counter.
+
+The two races are folded into one patch because patch (1) alone
+cleans up the corrupted list but leaves a deterministic UAF window
+for m_fp_list walkers that the transient-reference and
+persistent_id discipline in (2) close; bisecting onto an
+intermediate state would land on a UAF that pre-patch chaos merely
+made less reproducible.
+
+Validation:
+  * CONFIG_DEBUG_LIST coverage for the list_head reuse path.
+  * KASAN-enabled direct SMB2 durable-handle coverage that exercised
+    ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode()
+    returns while durable handles expired under concurrent rename
+    lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING
+    reports.
+  * checkpatch --strict
+  * make -j$(nproc) M=fs/smb/server
+
+Fixes: d484d621d40f ("ksmbd: add durable scavenger timer")
+Signed-off-by: DaeMyung Kang <charsyam@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/vfs_cache.c | 102 ++++++++++++++++++++++++++++----------
+ 1 file changed, 76 insertions(+), 26 deletions(-)
+
+diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
+index 3551f01a3fa03..60b7a2d60238a 100644
+--- a/fs/smb/server/vfs_cache.c
++++ b/fs/smb/server/vfs_cache.c
+@@ -418,6 +418,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+               return;
+       idr_remove(global_ft.idr, fp->persistent_id);
++      /*
++       * Clear persistent_id so a later __ksmbd_close_fd() that runs from a
++       * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode()
++       * walker held the final reference) does not re-issue idr_remove() on
++       * an id that idr_alloc_cyclic() may have already handed out to a new
++       * durable handle.
++       */
++      fp->persistent_id = KSMBD_NO_FID;
+ }
+ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+@@ -510,6 +518,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft,
+ static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
+ {
++      /*
++       * Detached durable fp -- session_fd_check() cleared fp->conn at
++       * preserve, so this fp is no longer tracked by any conn's
++       * stats.open_files_count.  This happens when
++       * ksmbd_scavenger_dispose_dh() hands the final close off to an
++       * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn
++       * is unrelated to the conn that originally opened the handle; close
++       * via the NULL-ft path so we do not underflow that unrelated
++       * counter.
++       */
++      if (!fp->conn) {
++              __ksmbd_close_fd(NULL, fp);
++              return;
++      }
+       __ksmbd_close_fd(&work->sess->file_table, fp);
+       atomic_dec(&work->conn->stats.open_files_count);
+ }
+@@ -881,24 +903,37 @@ static bool ksmbd_durable_scavenger_alive(void)
+       return true;
+ }
+-static void ksmbd_scavenger_dispose_dh(struct list_head *head)
++static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp)
+ {
+-      while (!list_empty(head)) {
+-              struct ksmbd_file *fp;
++      /*
++       * Durable-preserved fp can remain linked on f_ci->m_fp_list for
++       * share-mode checks.  Unlink it before final close; fp->node is not
++       * available as a scavenger-private list node because re-adding it to
++       * another list corrupts m_fp_list.
++       */
++      down_write(&fp->f_ci->m_lock);
++      list_del_init(&fp->node);
++      up_write(&fp->f_ci->m_lock);
+-              fp = list_first_entry(head, struct ksmbd_file, node);
+-              list_del_init(&fp->node);
++      /*
++       * Drop both the durable lifetime reference and the transient reference
++       * taken by the scavenger under global_ft.lock.  If a concurrent
++       * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp
++       * before the unlink above, that holder owns the final close via
++       * ksmbd_fd_put() -> __ksmbd_close_fd().  Otherwise the scavenger is
++       * the last putter and finalises fp here.
++       */
++      if (atomic_sub_and_test(2, &fp->refcount))
+               __ksmbd_close_fd(NULL, fp);
+-      }
+ }
+ static int ksmbd_durable_scavenger(void *dummy)
+ {
+       struct ksmbd_file *fp = NULL;
++      struct ksmbd_file *expired_fp;
+       unsigned int id;
+       unsigned int min_timeout = 1;
+       bool found_fp_timeout;
+-      LIST_HEAD(scavenger_list);
+       unsigned long remaining_jiffies;
+       __module_get(THIS_MODULE);
+@@ -908,8 +943,6 @@ static int ksmbd_durable_scavenger(void *dummy)
+               if (try_to_freeze())
+                       continue;
+-              found_fp_timeout = false;
+-
+               remaining_jiffies = wait_event_timeout(dh_wq,
+                                  ksmbd_durable_scavenger_alive() == false,
+                                  __msecs_to_jiffies(min_timeout));
+@@ -918,23 +951,39 @@ static int ksmbd_durable_scavenger(void *dummy)
+               else
+                       min_timeout = DURABLE_HANDLE_MAX_TIMEOUT;
+-              write_lock(&global_ft.lock);
+-              idr_for_each_entry(global_ft.idr, fp, id) {
+-                      if (!fp->durable_timeout)
+-                              continue;
++              do {
++                      expired_fp = NULL;
++                      found_fp_timeout = false;
+-                      if (atomic_read(&fp->refcount) > 1 ||
+-                          fp->conn)
+-                              continue;
+-
+-                      found_fp_timeout = true;
+-                      if (fp->durable_scavenger_timeout <=
+-                          jiffies_to_msecs(jiffies)) {
+-                              __ksmbd_remove_durable_fd(fp);
+-                              list_add(&fp->node, &scavenger_list);
+-                      } else {
++                      write_lock(&global_ft.lock);
++                      idr_for_each_entry(global_ft.idr, fp, id) {
+                               unsigned long durable_timeout;
++                              if (!fp->durable_timeout)
++                                      continue;
++
++                              if (atomic_read(&fp->refcount) > 1 ||
++                                  fp->conn)
++                                      continue;
++
++                              found_fp_timeout = true;
++                              if (fp->durable_scavenger_timeout <=
++                                  jiffies_to_msecs(jiffies)) {
++                                      __ksmbd_remove_durable_fd(fp);
++                                      /*
++                                       * Take a transient reference so fp
++                                       * cannot be freed by an in-flight
++                                       * ksmbd_lookup_fd_inode() that found
++                                       * it through f_ci->m_fp_list while we
++                                       * drop global_ft.lock and reach the
++                                       * m_fp_list unlink in
++                                       * ksmbd_scavenger_dispose_dh().
++                                       */
++                                      atomic_inc(&fp->refcount);
++                                      expired_fp = fp;
++                                      break;
++                              }
++
+                               durable_timeout =
+                                       fp->durable_scavenger_timeout -
+                                               jiffies_to_msecs(jiffies);
+@@ -942,10 +991,11 @@ static int ksmbd_durable_scavenger(void *dummy)
+                               if (min_timeout > durable_timeout)
+                                       min_timeout = durable_timeout;
+                       }
+-              }
+-              write_unlock(&global_ft.lock);
++                      write_unlock(&global_ft.lock);
+-              ksmbd_scavenger_dispose_dh(&scavenger_list);
++                      if (expired_fp)
++                              ksmbd_scavenger_dispose_dh(expired_fp);
++              } while (expired_fp);
+               if (found_fp_timeout == false)
+                       break;
+-- 
+2.53.0
+
index d7baa45de294e634977067c2381659147f1dc36a..c78f71d8a4f2eb5ae965bbd66946fc6b7119d10b 100644 (file)
@@ -1,2 +1,3 @@
 iommu-amd-fix-illegal-cap-mmio-access-in-iommu-debug.patch
 iommu-amd-remove-latent-out-of-bounds-access-in-iomm.patch
+ksmbd-close-durable-scavenger-races-against-m_fp_lis.patch