]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 7 Sep 2025 15:24:12 +0000 (17:24 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 7 Sep 2025 15:24:12 +0000 (17:24 +0200)
added patches:
mm-slub-avoid-wake-up-kswapd-in-set_track_prepare.patch
mm-slub-refactor-free-debug-processing.patch
slub-reflow-___slab_alloc.patch

queue-6.1/mm-slub-avoid-wake-up-kswapd-in-set_track_prepare.patch [new file with mode: 0644]
queue-6.1/mm-slub-refactor-free-debug-processing.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/slub-reflow-___slab_alloc.patch [new file with mode: 0644]

diff --git a/queue-6.1/mm-slub-avoid-wake-up-kswapd-in-set_track_prepare.patch b/queue-6.1/mm-slub-avoid-wake-up-kswapd-in-set_track_prepare.patch
new file mode 100644 (file)
index 0000000..eda873e
--- /dev/null
@@ -0,0 +1,168 @@
+From stable+bounces-178051-greg=kroah.com@vger.kernel.org Sun Sep  7 17:13:41 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  7 Sep 2025 11:13:27 -0400
+Subject: mm: slub: avoid wake up kswapd in set_track_prepare
+To: stable@vger.kernel.org
+Cc: yangshiguang <yangshiguang@xiaomi.com>, Vlastimil Babka <vbabka@suse.cz>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250907151327.641468-3-sashal@kernel.org>
+
+From: yangshiguang <yangshiguang@xiaomi.com>
+
+[ Upstream commit 850470a8413a8a78e772c4f6bd9fe81ec6bd5b0f ]
+
+set_track_prepare() can incur lock recursion.
+The issue is that it is called from hrtimer_start_range_ns
+holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
+CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
+and try to hold the per_cpu(hrtimer_bases)[n].lock.
+
+Avoid deadlock caused by implicitly waking up kswapd by passing in
+allocation flags, which do not contain __GFP_KSWAPD_RECLAIM in the
+debug_objects_fill_pool() case. Inside stack depot they are processed by
+gfp_nested_mask().
+Since ___slab_alloc() has preemption disabled, we mask out
+__GFP_DIRECT_RECLAIM from the flags there.
+
+The oops looks something like:
+
+BUG: spinlock recursion on CPU#3, swapper/3/0
+ lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
+Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
+Call trace:
+spin_bug+0x0
+_raw_spin_lock_irqsave+0x80
+hrtimer_try_to_cancel+0x94
+task_contending+0x10c
+enqueue_dl_entity+0x2a4
+dl_server_start+0x74
+enqueue_task_fair+0x568
+enqueue_task+0xac
+do_activate_task+0x14c
+ttwu_do_activate+0xcc
+try_to_wake_up+0x6c8
+default_wake_function+0x20
+autoremove_wake_function+0x1c
+__wake_up+0xac
+wakeup_kswapd+0x19c
+wake_all_kswapds+0x78
+__alloc_pages_slowpath+0x1ac
+__alloc_pages_noprof+0x298
+stack_depot_save_flags+0x6b0
+stack_depot_save+0x14
+set_track_prepare+0x5c
+___slab_alloc+0xccc
+__kmalloc_cache_noprof+0x470
+__set_page_owner+0x2bc
+post_alloc_hook[jt]+0x1b8
+prep_new_page+0x28
+get_page_from_freelist+0x1edc
+__alloc_pages_noprof+0x13c
+alloc_slab_page+0x244
+allocate_slab+0x7c
+___slab_alloc+0x8e8
+kmem_cache_alloc_noprof+0x450
+debug_objects_fill_pool+0x22c
+debug_object_activate+0x40
+enqueue_hrtimer[jt]+0xdc
+hrtimer_start_range_ns+0x5f8
+...
+
+Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
+Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
+Cc: stable@vger.kernel.org
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |   30 ++++++++++++++++++++----------
+ 1 file changed, 20 insertions(+), 10 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -726,19 +726,19 @@ static struct track *get_track(struct km
+ }
+ #ifdef CONFIG_STACKDEPOT
+-static noinline depot_stack_handle_t set_track_prepare(void)
++static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
+ {
+       depot_stack_handle_t handle;
+       unsigned long entries[TRACK_ADDRS_COUNT];
+       unsigned int nr_entries;
+       nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+-      handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
++      handle = stack_depot_save(entries, nr_entries, gfp_flags);
+       return handle;
+ }
+ #else
+-static inline depot_stack_handle_t set_track_prepare(void)
++static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
+ {
+       return 0;
+ }
+@@ -760,9 +760,9 @@ static void set_track_update(struct kmem
+ }
+ static __always_inline void set_track(struct kmem_cache *s, void *object,
+-                                    enum track_item alloc, unsigned long addr)
++                                    enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
+ {
+-      depot_stack_handle_t handle = set_track_prepare();
++      depot_stack_handle_t handle = set_track_prepare(gfp_flags);
+       set_track_update(s, object, alloc, addr, handle);
+ }
+@@ -1656,9 +1656,9 @@ static inline bool free_debug_processing
+ static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
+ static inline int check_object(struct kmem_cache *s, struct slab *slab,
+                       void *object, u8 val) { return 1; }
+-static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
++static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; }
+ static inline void set_track(struct kmem_cache *s, void *object,
+-                           enum track_item alloc, unsigned long addr) {}
++                           enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {}
+ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
+                                       struct slab *slab) {}
+ static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
+@@ -3135,9 +3135,14 @@ new_objects:
+                        * For debug caches here we had to go through
+                        * alloc_single_from_partial() so just store the
+                        * tracking info and return the object.
++                       *
++                       * Due to disabled preemption we need to disallow
++                       * blocking. The flags are further adjusted by
++                       * gfp_nested_mask() in stack_depot itself.
+                        */
+                       if (s->flags & SLAB_STORE_USER)
+-                              set_track(s, freelist, TRACK_ALLOC, addr);
++                              set_track(s, freelist, TRACK_ALLOC, addr,
++                                        gfpflags & ~(__GFP_DIRECT_RECLAIM));
+                       return freelist;
+               }
+@@ -3163,7 +3168,8 @@ new_objects:
+                       goto new_objects;
+               if (s->flags & SLAB_STORE_USER)
+-                      set_track(s, freelist, TRACK_ALLOC, addr);
++                      set_track(s, freelist, TRACK_ALLOC, addr,
++                                gfpflags & ~(__GFP_DIRECT_RECLAIM));
+               return freelist;
+       }
+@@ -3414,8 +3420,12 @@ static noinline void free_to_partial_lis
+       unsigned long flags;
+       depot_stack_handle_t handle = 0;
++      /*
++       * We cannot use GFP_NOWAIT as there are callsites where waking up
++       * kswapd could deadlock
++       */
+       if (s->flags & SLAB_STORE_USER)
+-              handle = set_track_prepare();
++              handle = set_track_prepare(__GFP_NOWARN);
+       spin_lock_irqsave(&n->list_lock, flags);
diff --git a/queue-6.1/mm-slub-refactor-free-debug-processing.patch b/queue-6.1/mm-slub-refactor-free-debug-processing.patch
new file mode 100644 (file)
index 0000000..eabb747
--- /dev/null
@@ -0,0 +1,276 @@
+From stable+bounces-178049-greg=kroah.com@vger.kernel.org Sun Sep  7 17:13:37 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  7 Sep 2025 11:13:25 -0400
+Subject: mm, slub: refactor free debug processing
+To: stable@vger.kernel.org
+Cc: Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@linux.ibm.com>, Christoph Lameter <cl@linux.com>, Hyeonggon Yoo <42.hyeyoo@gmail.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250907151327.641468-1-sashal@kernel.org>
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+[ Upstream commit fa9b88e459d710cadf3b01e8a64eda00cc91cdd6 ]
+
+Since commit c7323a5ad078 ("mm/slub: restrict sysfs validation to debug
+caches and make it safe"), caches with debugging enabled use the
+free_debug_processing() function to do both freeing checks and actual
+freeing to partial list under list_lock, bypassing the fast paths.
+
+We will want to use the same path for CONFIG_SLUB_TINY, but without the
+debugging checks, so refactor the code so that free_debug_processing()
+does only the checks, while the freeing is handled by a new function
+free_to_partial_list().
+
+For consistency, change return parameter alloc_debug_processing() from
+int to bool and correct the !SLUB_DEBUG variant to return true and not
+false. This didn't matter until now, but will in the following changes.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Mike Rapoport <rppt@linux.ibm.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Stable-dep-of: 850470a8413a ("mm: slub: avoid wake up kswapd in set_track_prepare")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 83 insertions(+), 71 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1368,7 +1368,7 @@ static inline int alloc_consistency_chec
+       return 1;
+ }
+-static noinline int alloc_debug_processing(struct kmem_cache *s,
++static noinline bool alloc_debug_processing(struct kmem_cache *s,
+                       struct slab *slab, void *object, int orig_size)
+ {
+       if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+@@ -1380,7 +1380,7 @@ static noinline int alloc_debug_processi
+       trace(s, slab, object, 1);
+       set_orig_size(s, object, orig_size);
+       init_object(s, object, SLUB_RED_ACTIVE);
+-      return 1;
++      return true;
+ bad:
+       if (folio_test_slab(slab_folio(slab))) {
+@@ -1393,7 +1393,7 @@ bad:
+               slab->inuse = slab->objects;
+               slab->freelist = NULL;
+       }
+-      return 0;
++      return false;
+ }
+ static inline int free_consistency_checks(struct kmem_cache *s,
+@@ -1646,17 +1646,17 @@ static inline void setup_object_debug(st
+ static inline
+ void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
+-static inline int alloc_debug_processing(struct kmem_cache *s,
+-      struct slab *slab, void *object, int orig_size) { return 0; }
++static inline bool alloc_debug_processing(struct kmem_cache *s,
++      struct slab *slab, void *object, int orig_size) { return true; }
+-static inline void free_debug_processing(
+-      struct kmem_cache *s, struct slab *slab,
+-      void *head, void *tail, int bulk_cnt,
+-      unsigned long addr) {}
++static inline bool free_debug_processing(struct kmem_cache *s,
++      struct slab *slab, void *head, void *tail, int *bulk_cnt,
++      unsigned long addr, depot_stack_handle_t handle) { return true; }
+ static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
+ static inline int check_object(struct kmem_cache *s, struct slab *slab,
+                       void *object, u8 val) { return 1; }
++static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
+ static inline void set_track(struct kmem_cache *s, void *object,
+                            enum track_item alloc, unsigned long addr) {}
+ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
+@@ -2833,38 +2833,28 @@ static inline unsigned long node_nr_objs
+ }
+ /* Supports checking bulk free of a constructed freelist */
+-static noinline void free_debug_processing(
+-      struct kmem_cache *s, struct slab *slab,
+-      void *head, void *tail, int bulk_cnt,
+-      unsigned long addr)
++static inline bool free_debug_processing(struct kmem_cache *s,
++      struct slab *slab, void *head, void *tail, int *bulk_cnt,
++      unsigned long addr, depot_stack_handle_t handle)
+ {
+-      struct kmem_cache_node *n = get_node(s, slab_nid(slab));
+-      struct slab *slab_free = NULL;
++      bool checks_ok = false;
+       void *object = head;
+       int cnt = 0;
+-      unsigned long flags;
+-      bool checks_ok = false;
+-      depot_stack_handle_t handle = 0;
+-
+-      if (s->flags & SLAB_STORE_USER)
+-              handle = set_track_prepare();
+-
+-      spin_lock_irqsave(&n->list_lock, flags);
+       if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+               if (!check_slab(s, slab))
+                       goto out;
+       }
+-      if (slab->inuse < bulk_cnt) {
++      if (slab->inuse < *bulk_cnt) {
+               slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
+-                       slab->inuse, bulk_cnt);
++                       slab->inuse, *bulk_cnt);
+               goto out;
+       }
+ next_object:
+-      if (++cnt > bulk_cnt)
++      if (++cnt > *bulk_cnt)
+               goto out_cnt;
+       if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+@@ -2886,57 +2876,18 @@ next_object:
+       checks_ok = true;
+ out_cnt:
+-      if (cnt != bulk_cnt)
++      if (cnt != *bulk_cnt) {
+               slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
+-                       bulk_cnt, cnt);
+-
+-out:
+-      if (checks_ok) {
+-              void *prior = slab->freelist;
+-
+-              /* Perform the actual freeing while we still hold the locks */
+-              slab->inuse -= cnt;
+-              set_freepointer(s, tail, prior);
+-              slab->freelist = head;
+-
+-              /*
+-               * If the slab is empty, and node's partial list is full,
+-               * it should be discarded anyway no matter it's on full or
+-               * partial list.
+-               */
+-              if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
+-                      slab_free = slab;
+-
+-              if (!prior) {
+-                      /* was on full list */
+-                      remove_full(s, n, slab);
+-                      if (!slab_free) {
+-                              add_partial(n, slab, DEACTIVATE_TO_TAIL);
+-                              stat(s, FREE_ADD_PARTIAL);
+-                      }
+-              } else if (slab_free) {
+-                      remove_partial(n, slab);
+-                      stat(s, FREE_REMOVE_PARTIAL);
+-              }
++                       *bulk_cnt, cnt);
++              *bulk_cnt = cnt;
+       }
+-      if (slab_free) {
+-              /*
+-               * Update the counters while still holding n->list_lock to
+-               * prevent spurious validation warnings
+-               */
+-              dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
+-      }
+-
+-      spin_unlock_irqrestore(&n->list_lock, flags);
++out:
+       if (!checks_ok)
+               slab_fix(s, "Object at 0x%p not freed", object);
+-      if (slab_free) {
+-              stat(s, FREE_SLAB);
+-              free_slab(s, slab_free);
+-      }
++      return checks_ok;
+ }
+ #endif /* CONFIG_SLUB_DEBUG */
+@@ -3453,6 +3404,67 @@ void *kmem_cache_alloc_node(struct kmem_
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node);
++static noinline void free_to_partial_list(
++      struct kmem_cache *s, struct slab *slab,
++      void *head, void *tail, int bulk_cnt,
++      unsigned long addr)
++{
++      struct kmem_cache_node *n = get_node(s, slab_nid(slab));
++      struct slab *slab_free = NULL;
++      int cnt = bulk_cnt;
++      unsigned long flags;
++      depot_stack_handle_t handle = 0;
++
++      if (s->flags & SLAB_STORE_USER)
++              handle = set_track_prepare();
++
++      spin_lock_irqsave(&n->list_lock, flags);
++
++      if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
++              void *prior = slab->freelist;
++
++              /* Perform the actual freeing while we still hold the locks */
++              slab->inuse -= cnt;
++              set_freepointer(s, tail, prior);
++              slab->freelist = head;
++
++              /*
++               * If the slab is empty, and node's partial list is full,
++               * it should be discarded anyway no matter it's on full or
++               * partial list.
++               */
++              if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
++                      slab_free = slab;
++
++              if (!prior) {
++                      /* was on full list */
++                      remove_full(s, n, slab);
++                      if (!slab_free) {
++                              add_partial(n, slab, DEACTIVATE_TO_TAIL);
++                              stat(s, FREE_ADD_PARTIAL);
++                      }
++              } else if (slab_free) {
++                      remove_partial(n, slab);
++                      stat(s, FREE_REMOVE_PARTIAL);
++              }
++      }
++
++      if (slab_free) {
++              /*
++               * Update the counters while still holding n->list_lock to
++               * prevent spurious validation warnings
++               */
++              dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
++      }
++
++      spin_unlock_irqrestore(&n->list_lock, flags);
++
++      if (slab_free) {
++              stat(s, FREE_SLAB);
++              free_slab(s, slab_free);
++      }
++}
++
+ /*
+  * Slow path handling. This may still be called frequently since objects
+  * have a longer lifetime than the cpu slabs in most processing loads.
+@@ -3479,7 +3491,7 @@ static void __slab_free(struct kmem_cach
+               return;
+       if (kmem_cache_debug(s)) {
+-              free_debug_processing(s, slab, head, tail, cnt, addr);
++              free_to_partial_list(s, slab, head, tail, cnt, addr);
+               return;
+       }
index 6688d2bf907290d567ce541968d09d0008abe004..c6dc63fab443969b953dfce08a072fb2e20c781f 100644 (file)
@@ -94,3 +94,6 @@ drm-amdgpu-replace-drm_-with-dev_-in-amdgpu_psp.c.patch
 drm-amd-amdgpu-fix-missing-error-return-on-kzalloc-f.patch
 tools-gpio-rm-.-.cmd-on-make-clean.patch
 tools-gpio-remove-the-include-directory-on-make-clea.patch
+mm-slub-refactor-free-debug-processing.patch
+slub-reflow-___slab_alloc.patch
+mm-slub-avoid-wake-up-kswapd-in-set_track_prepare.patch
diff --git a/queue-6.1/slub-reflow-___slab_alloc.patch b/queue-6.1/slub-reflow-___slab_alloc.patch
new file mode 100644 (file)
index 0000000..cce0954
--- /dev/null
@@ -0,0 +1,80 @@
+From stable+bounces-178050-greg=kroah.com@vger.kernel.org Sun Sep  7 17:13:41 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  7 Sep 2025 11:13:26 -0400
+Subject: slub: Reflow ___slab_alloc()
+To: stable@vger.kernel.org
+Cc: Chengming Zhou <zhouchengming@bytedance.com>, Vlastimil Babka <vbabka@suse.cz>, Hyeonggon Yoo <42.hyeyoo@gmail.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250907151327.641468-2-sashal@kernel.org>
+
+From: Chengming Zhou <zhouchengming@bytedance.com>
+
+[ Upstream commit 24c6a097b5a270e05c6e99a99da66b91be81fd7d ]
+
+The get_partial() interface used in ___slab_alloc() may return a single
+object in the "kmem_cache_debug(s)" case, in which we will just return
+the "freelist" object.
+
+Move this handling up to prepare for later changes.
+
+And the "pfmemalloc_match()" part is not needed for node partial slab,
+since we already check this in the get_partial_node().
+
+Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Stable-dep-of: 850470a8413a ("mm: slub: avoid wake up kswapd in set_track_prepare")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |   31 +++++++++++++++----------------
+ 1 file changed, 15 insertions(+), 16 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3129,8 +3129,21 @@ new_objects:
+       pc.slab = &slab;
+       pc.orig_size = orig_size;
+       freelist = get_partial(s, node, &pc);
+-      if (freelist)
+-              goto check_new_slab;
++      if (freelist) {
++              if (kmem_cache_debug(s)) {
++                      /*
++                       * For debug caches here we had to go through
++                       * alloc_single_from_partial() so just store the
++                       * tracking info and return the object.
++                       */
++                      if (s->flags & SLAB_STORE_USER)
++                              set_track(s, freelist, TRACK_ALLOC, addr);
++
++                      return freelist;
++              }
++
++              goto retry_load_slab;
++      }
+       slub_put_cpu_ptr(s->cpu_slab);
+       slab = new_slab(s, gfpflags, node);
+@@ -3166,20 +3179,6 @@ new_objects:
+       inc_slabs_node(s, slab_nid(slab), slab->objects);
+-check_new_slab:
+-
+-      if (kmem_cache_debug(s)) {
+-              /*
+-               * For debug caches here we had to go through
+-               * alloc_single_from_partial() so just store the tracking info
+-               * and return the object
+-               */
+-              if (s->flags & SLAB_STORE_USER)
+-                      set_track(s, freelist, TRACK_ALLOC, addr);
+-
+-              return freelist;
+-      }
+-
+       if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
+               /*
+                * For !pfmemalloc_match() case we don't load freelist so that