]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
slab: remove the do_slab_free() fastpath
authorVlastimil Babka <vbabka@suse.cz>
Fri, 23 Jan 2026 06:52:51 +0000 (07:52 +0100)
committerVlastimil Babka <vbabka@suse.cz>
Thu, 29 Jan 2026 08:29:26 +0000 (09:29 +0100)
We have removed cpu slab usage from allocation paths. Now remove
do_slab_free() which was freeing objects to the cpu slab when
the object belonged to it. Instead call __slab_free() directly,
which was previously the fallback.

This simplifies kfree_nolock() - when freeing to percpu sheaf
fails, we can call defer_free() directly.

Also remove functions that became unused.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
mm/slub.c

index c254b4b66d1b585ae97291d0cc5ebeec7ddd214a..4346ace499bb80b8ae0e05981410f6f7e17beded 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3678,29 +3678,6 @@ static inline unsigned int init_tid(int cpu)
        return cpu;
 }
 
-static inline void note_cmpxchg_failure(const char *n,
-               const struct kmem_cache *s, unsigned long tid)
-{
-#ifdef SLUB_DEBUG_CMPXCHG
-       unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
-
-       pr_info("%s %s: cmpxchg redo ", n, s->name);
-
-       if (IS_ENABLED(CONFIG_PREEMPTION) &&
-           tid_to_cpu(tid) != tid_to_cpu(actual_tid)) {
-               pr_warn("due to cpu change %d -> %d\n",
-                       tid_to_cpu(tid), tid_to_cpu(actual_tid));
-       } else if (tid_to_event(tid) != tid_to_event(actual_tid)) {
-               pr_warn("due to cpu running other code. Event %ld->%ld\n",
-                       tid_to_event(tid), tid_to_event(actual_tid));
-       } else {
-               pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
-                       actual_tid, tid, next_tid(tid));
-       }
-#endif
-       stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
-}
-
 static void init_kmem_cache_cpus(struct kmem_cache *s)
 {
 #ifdef CONFIG_PREEMPT_RT
@@ -4239,18 +4216,6 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
        return true;
 }
 
-static inline bool
-__update_cpu_freelist_fast(struct kmem_cache *s,
-                          void *freelist_old, void *freelist_new,
-                          unsigned long tid)
-{
-       struct freelist_tid old = { .freelist = freelist_old, .tid = tid };
-       struct freelist_tid new = { .freelist = freelist_new, .tid = next_tid(tid) };
-
-       return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid,
-                                            &old.freelist_tid, new.freelist_tid);
-}
-
 /*
  * Get the slab's freelist and do not freeze it.
  *
@@ -6188,99 +6153,6 @@ void defer_free_barrier(void)
                irq_work_sync(&per_cpu_ptr(&defer_free_objects, cpu)->work);
 }
 
-/*
- * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
- * can perform fastpath freeing without additional function calls.
- *
- * The fastpath is only possible if we are freeing to the current cpu slab
- * of this processor. This typically the case if we have just allocated
- * the item before.
- *
- * If fastpath is not possible then fall back to __slab_free where we deal
- * with all sorts of special processing.
- *
- * Bulk free of a freelist with several objects (all pointing to the
- * same slab) possible by specifying head and tail ptr, plus objects
- * count (cnt). Bulk free indicated by tail pointer being set.
- */
-static __always_inline void do_slab_free(struct kmem_cache *s,
-                               struct slab *slab, void *head, void *tail,
-                               int cnt, unsigned long addr)
-{
-       /* cnt == 0 signals that it's called from kfree_nolock() */
-       bool allow_spin = cnt;
-       struct kmem_cache_cpu *c;
-       unsigned long tid;
-       void **freelist;
-
-redo:
-       /*
-        * Determine the currently cpus per cpu slab.
-        * The cpu may change afterward. However that does not matter since
-        * data is retrieved via this pointer. If we are on the same cpu
-        * during the cmpxchg then the free will succeed.
-        */
-       c = raw_cpu_ptr(s->cpu_slab);
-       tid = READ_ONCE(c->tid);
-
-       /* Same with comment on barrier() in __slab_alloc_node() */
-       barrier();
-
-       if (unlikely(slab != c->slab)) {
-               if (unlikely(!allow_spin)) {
-                       /*
-                        * __slab_free() can locklessly cmpxchg16 into a slab,
-                        * but then it might need to take spin_lock
-                        * for further processing.
-                        * Avoid the complexity and simply add to a deferred list.
-                        */
-                       defer_free(s, head);
-               } else {
-                       __slab_free(s, slab, head, tail, cnt, addr);
-               }
-               return;
-       }
-
-       if (unlikely(!allow_spin)) {
-               if ((in_nmi() || !USE_LOCKLESS_FAST_PATH()) &&
-                   local_lock_is_locked(&s->cpu_slab->lock)) {
-                       defer_free(s, head);
-                       return;
-               }
-               cnt = 1; /* restore cnt. kfree_nolock() frees one object at a time */
-       }
-
-       if (USE_LOCKLESS_FAST_PATH()) {
-               freelist = READ_ONCE(c->freelist);
-
-               set_freepointer(s, tail, freelist);
-
-               if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
-                       note_cmpxchg_failure("slab_free", s, tid);
-                       goto redo;
-               }
-       } else {
-               __maybe_unused unsigned long flags = 0;
-
-               /* Update the free list under the local lock */
-               local_lock_cpu_slab(s, flags);
-               c = this_cpu_ptr(s->cpu_slab);
-               if (unlikely(slab != c->slab)) {
-                       local_unlock_cpu_slab(s, flags);
-                       goto redo;
-               }
-               tid = c->tid;
-               freelist = c->freelist;
-
-               set_freepointer(s, tail, freelist);
-               c->freelist = head;
-               c->tid = next_tid(tid);
-
-               local_unlock_cpu_slab(s, flags);
-       }
-       stat_add(s, FREE_FASTPATH, cnt);
-}
-
 static __fastpath_inline
 void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
               unsigned long addr)
@@ -6297,7 +6169,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
                        return;
        }
 
-       do_slab_free(s, slab, object, object, 1, addr);
+       __slab_free(s, slab, object, object, 1, addr);
 }
 
 #ifdef CONFIG_MEMCG
@@ -6306,7 +6178,7 @@ static noinline
 void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
 {
        if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
-               do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
+               __slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
 }
 #endif
 
@@ -6321,7 +6193,7 @@ void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
         * to remove objects, whose reuse must be delayed.
         */
        if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
-               do_slab_free(s, slab, head, tail, cnt, addr);
+               __slab_free(s, slab, head, tail, cnt, addr);
 }
 
 #ifdef CONFIG_SLUB_RCU_DEBUG
@@ -6347,14 +6219,14 @@ static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
 
        /* resume freeing */
        if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
-               do_slab_free(s, slab, object, object, 1, _THIS_IP_);
+               __slab_free(s, slab, object, object, 1, _THIS_IP_);
 }
 #endif /* CONFIG_SLUB_RCU_DEBUG */
 
 #ifdef CONFIG_KASAN_GENERIC
 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
 {
-       do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
+       __slab_free(cache, virt_to_slab(x), x, x, 1, addr);
 }
 #endif
 
@@ -6570,7 +6442,12 @@ void kfree_nolock(const void *object)
                        return;
        }
 
-       do_slab_free(s, slab, x, x, 0, _RET_IP_);
+       /*
+        * __slab_free() can locklessly cmpxchg16 into a slab, but then it might
+        * need to take spin_lock for further processing.
+        * Avoid the complexity and simply add to a deferred list.
+        */
+       defer_free(s, x);
 }
 EXPORT_SYMBOL_GPL(kfree_nolock);
 
@@ -6996,7 +6873,7 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
                if (kfence_free(df.freelist))
                        continue;
 
-               do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
+               __slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
                             _RET_IP_);
        } while (likely(size));
 }
@@ -7082,7 +6959,7 @@ __refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
                                cnt++;
                                object = get_freepointer(s, object);
                        } while (object);
-                       do_slab_free(s, slab, head, tail, cnt, _RET_IP_);
+                       __slab_free(s, slab, head, tail, cnt, _RET_IP_);
                }
 
                if (refilled >= max)