]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
slab: remove struct kmem_cache_cpu
authorVlastimil Babka <vbabka@suse.cz>
Fri, 23 Jan 2026 06:52:54 +0000 (07:52 +0100)
committerVlastimil Babka <vbabka@suse.cz>
Thu, 29 Jan 2026 08:29:27 +0000 (09:29 +0100)
The cpu slab is not used anymore for allocation or freeing, the
remaining code is for flushing, but it's effectively dead.  Remove the
whole struct kmem_cache_cpu, the flushing code and other orphaned
functions.

The remaining used field of kmem_cache_cpu is the stat array with
CONFIG_SLUB_STATS. Put it instead in a new struct kmem_cache_stats.
In struct kmem_cache, the field is cpu_stats and placed near the
end of the struct.

Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
mm/slab.h
mm/slub.c

index 31375198e19c24f9f40523c8be6afa20cba4ef9d..e251a2714b0c58580fea157f492d8457418b8ba2 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
 # define system_has_freelist_aba()     system_has_cmpxchg128()
 # define try_cmpxchg_freelist          try_cmpxchg128
 # endif
-#define this_cpu_try_cmpxchg_freelist  this_cpu_try_cmpxchg128
 typedef u128 freelist_full_t;
 #else /* CONFIG_64BIT */
 # ifdef system_has_cmpxchg64
 # define system_has_freelist_aba()     system_has_cmpxchg64()
 # define try_cmpxchg_freelist          try_cmpxchg64
 # endif
-#define this_cpu_try_cmpxchg_freelist  this_cpu_try_cmpxchg64
 typedef u64 freelist_full_t;
 #endif /* CONFIG_64BIT */
 
@@ -189,7 +187,6 @@ struct kmem_cache_order_objects {
  * Slab cache management.
  */
 struct kmem_cache {
-       struct kmem_cache_cpu __percpu *cpu_slab;
        struct slub_percpu_sheaves __percpu *cpu_sheaves;
        /* Used for retrieving partial slabs, etc. */
        slab_flags_t flags;
@@ -238,6 +235,10 @@ struct kmem_cache {
        unsigned int usersize;          /* Usercopy region size */
 #endif
 
+#ifdef CONFIG_SLUB_STATS
+       struct kmem_cache_stats __percpu *cpu_stats;
+#endif
+
        struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
index 0ab1423ee0fad3e86f9b1da00c8ae3bc2881e6ea..8e4482e75a5cc6d569f58219c93d21cb98c3669e 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -405,28 +405,11 @@ enum stat_item {
        NR_SLUB_STAT_ITEMS
 };
 
-struct freelist_tid {
-       union {
-               struct {
-                       void *freelist;         /* Pointer to next available object */
-                       unsigned long tid;      /* Globally unique transaction id */
-               };
-               freelist_full_t freelist_tid;
-       };
-};
-
-/*
- * When changing the layout, make sure freelist and tid are still compatible
- * with this_cpu_cmpxchg_double() alignment requirements.
- */
-struct kmem_cache_cpu {
-       struct freelist_tid;
-       struct slab *slab;      /* The slab from which we are allocating */
-       local_trylock_t lock;   /* Protects the fields above */
 #ifdef CONFIG_SLUB_STATS
+struct kmem_cache_stats {
        unsigned int stat[NR_SLUB_STAT_ITEMS];
-#endif
 };
+#endif
 
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
@@ -435,7 +418,7 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
         * The rmw is racy on a preemptible kernel but this is acceptable, so
         * avoid this_cpu_add()'s irq-disable overhead.
         */
-       raw_cpu_inc(s->cpu_slab->stat[si]);
+       raw_cpu_inc(s->cpu_stats->stat[si]);
 #endif
 }
 
@@ -443,7 +426,7 @@ static inline
 void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
 {
 #ifdef CONFIG_SLUB_STATS
-       raw_cpu_add(s->cpu_slab->stat[si], v);
+       raw_cpu_add(s->cpu_stats->stat[si], v);
 #endif
 }
 
@@ -532,7 +515,7 @@ static inline struct node_barn *get_barn(struct kmem_cache *s)
 static nodemask_t slab_nodes;
 
 /*
- * Workqueue used for flush_cpu_slab().
+ * Workqueue used for flushing cpu and kfree_rcu sheaves.
  */
 static struct workqueue_struct *flushwq;
 
@@ -1154,20 +1137,6 @@ static void object_err(struct kmem_cache *s, struct slab *slab,
        WARN_ON(1);
 }
 
-static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
-                              void **freelist, void *nextfree)
-{
-       if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
-           !check_valid_pointer(s, slab, nextfree) && freelist) {
-               object_err(s, slab, *freelist, "Freechain corrupt");
-               *freelist = NULL;
-               slab_fix(s, "Isolate corrupted freechain");
-               return true;
-       }
-
-       return false;
-}
-
 static void __slab_err(struct slab *slab)
 {
        if (slab_in_kunit_test())
@@ -1949,11 +1918,6 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
-static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
-                              void **freelist, void *nextfree)
-{
-       return false;
-}
 #endif /* CONFIG_SLUB_DEBUG */
 
 /*
@@ -3651,191 +3615,6 @@ static void *get_from_partial(struct kmem_cache *s, int node,
        return get_from_any_partial(s, pc);
 }
 
-#ifdef CONFIG_PREEMPTION
-/*
- * Calculate the next globally unique transaction for disambiguation
- * during cmpxchg. The transactions start with the cpu number and are then
- * incremented by CONFIG_NR_CPUS.
- */
-#define TID_STEP  roundup_pow_of_two(CONFIG_NR_CPUS)
-#else
-/*
- * No preemption supported therefore also no need to check for
- * different cpus.
- */
-#define TID_STEP 1
-#endif /* CONFIG_PREEMPTION */
-
-static inline unsigned long next_tid(unsigned long tid)
-{
-       return tid + TID_STEP;
-}
-
-#ifdef SLUB_DEBUG_CMPXCHG
-static inline unsigned int tid_to_cpu(unsigned long tid)
-{
-       return tid % TID_STEP;
-}
-
-static inline unsigned long tid_to_event(unsigned long tid)
-{
-       return tid / TID_STEP;
-}
-#endif
-
-static inline unsigned int init_tid(int cpu)
-{
-       return cpu;
-}
-
-static void init_kmem_cache_cpus(struct kmem_cache *s)
-{
-       int cpu;
-       struct kmem_cache_cpu *c;
-
-       for_each_possible_cpu(cpu) {
-               c = per_cpu_ptr(s->cpu_slab, cpu);
-               local_trylock_init(&c->lock);
-               c->tid = init_tid(cpu);
-       }
-}
-
-/*
- * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
- * unfreezes the slabs and puts it on the proper list.
- * Assumes the slab has been already safely taken away from kmem_cache_cpu
- * by the caller.
- */
-static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
-                           void *freelist)
-{
-       struct kmem_cache_node *n = get_node(s, slab_nid(slab));
-       int free_delta = 0;
-       void *nextfree, *freelist_iter, *freelist_tail;
-       int tail = DEACTIVATE_TO_HEAD;
-       unsigned long flags = 0;
-       struct freelist_counters old, new;
-
-       if (READ_ONCE(slab->freelist)) {
-               stat(s, DEACTIVATE_REMOTE_FREES);
-               tail = DEACTIVATE_TO_TAIL;
-       }
-
-       /*
-        * Stage one: Count the objects on cpu's freelist as free_delta and
-        * remember the last object in freelist_tail for later splicing.
-        */
-       freelist_tail = NULL;
-       freelist_iter = freelist;
-       while (freelist_iter) {
-               nextfree = get_freepointer(s, freelist_iter);
-
-               /*
-                * If 'nextfree' is invalid, it is possible that the object at
-                * 'freelist_iter' is already corrupted.  So isolate all objects
-                * starting at 'freelist_iter' by skipping them.
-                */
-               if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
-                       break;
-
-               freelist_tail = freelist_iter;
-               free_delta++;
-
-               freelist_iter = nextfree;
-       }
-
-       /*
-        * Stage two: Unfreeze the slab while splicing the per-cpu
-        * freelist to the head of slab's freelist.
-        */
-       do {
-               old.freelist = READ_ONCE(slab->freelist);
-               old.counters = READ_ONCE(slab->counters);
-               VM_BUG_ON(!old.frozen);
-
-               /* Determine target state of the slab */
-               new.counters = old.counters;
-               new.frozen = 0;
-               if (freelist_tail) {
-                       new.inuse -= free_delta;
-                       set_freepointer(s, freelist_tail, old.freelist);
-                       new.freelist = freelist;
-               } else {
-                       new.freelist = old.freelist;
-               }
-       } while (!slab_update_freelist(s, slab, &old, &new, "unfreezing slab"));
-
-       /*
-        * Stage three: Manipulate the slab list based on the updated state.
-        */
-       if (!new.inuse && n->nr_partial >= s->min_partial) {
-               stat(s, DEACTIVATE_EMPTY);
-               discard_slab(s, slab);
-               stat(s, FREE_SLAB);
-       } else if (new.freelist) {
-               spin_lock_irqsave(&n->list_lock, flags);
-               add_partial(n, slab, tail);
-               spin_unlock_irqrestore(&n->list_lock, flags);
-               stat(s, tail);
-       } else {
-               stat(s, DEACTIVATE_FULL);
-       }
-}
-
-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
-{
-       unsigned long flags;
-       struct slab *slab;
-       void *freelist;
-
-       local_lock_irqsave(&s->cpu_slab->lock, flags);
-
-       slab = c->slab;
-       freelist = c->freelist;
-
-       c->slab = NULL;
-       c->freelist = NULL;
-       c->tid = next_tid(c->tid);
-
-       local_unlock_irqrestore(&s->cpu_slab->lock, flags);
-
-       if (slab) {
-               deactivate_slab(s, slab, freelist);
-               stat(s, CPUSLAB_FLUSH);
-       }
-}
-
-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
-{
-       struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
-       void *freelist = c->freelist;
-       struct slab *slab = c->slab;
-
-       c->slab = NULL;
-       c->freelist = NULL;
-       c->tid = next_tid(c->tid);
-
-       if (slab) {
-               deactivate_slab(s, slab, freelist);
-               stat(s, CPUSLAB_FLUSH);
-       }
-}
-
-static inline void flush_this_cpu_slab(struct kmem_cache *s)
-{
-       struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
-
-       if (c->slab)
-               flush_slab(s, c);
-}
-
-static bool has_cpu_slab(int cpu, struct kmem_cache *s)
-{
-       struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
-
-       return c->slab;
-}
-
 static bool has_pcs_used(int cpu, struct kmem_cache *s)
 {
        struct slub_percpu_sheaves *pcs;
@@ -3849,11 +3628,11 @@ static bool has_pcs_used(int cpu, struct kmem_cache *s)
 }
 
 /*
- * Flush cpu slab.
+ * Flush percpu sheaves
  *
  * Called from CPU work handler with migration disabled.
  */
-static void flush_cpu_slab(struct work_struct *w)
+static void flush_cpu_sheaves(struct work_struct *w)
 {
        struct kmem_cache *s;
        struct slub_flush_work *sfw;
@@ -3864,8 +3643,6 @@ static void flush_cpu_slab(struct work_struct *w)
 
        if (cache_has_sheaves(s))
                pcs_flush_all(s);
-
-       flush_this_cpu_slab(s);
 }
 
 static void flush_all_cpus_locked(struct kmem_cache *s)
@@ -3878,11 +3655,11 @@ static void flush_all_cpus_locked(struct kmem_cache *s)
 
        for_each_online_cpu(cpu) {
                sfw = &per_cpu(slub_flush, cpu);
-               if (!has_cpu_slab(cpu, s) && !has_pcs_used(cpu, s)) {
+               if (!has_pcs_used(cpu, s)) {
                        sfw->skip = true;
                        continue;
                }
-               INIT_WORK(&sfw->work, flush_cpu_slab);
+               INIT_WORK(&sfw->work, flush_cpu_sheaves);
                sfw->skip = false;
                sfw->s = s;
                queue_work_on(cpu, flushwq, &sfw->work);
@@ -3988,7 +3765,6 @@ static int slub_cpu_dead(unsigned int cpu)
 
        mutex_lock(&slab_mutex);
        list_for_each_entry(s, &slab_caches, list) {
-               __flush_cpu_slab(s, cpu);
                if (cache_has_sheaves(s))
                        __pcs_flush_all_cpu(s, cpu);
        }
@@ -7162,26 +6938,21 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
                barn_init(barn);
 }
 
-static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
+#ifdef CONFIG_SLUB_STATS
+static inline int alloc_kmem_cache_stats(struct kmem_cache *s)
 {
        BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
                        NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
-                       sizeof(struct kmem_cache_cpu));
+                       sizeof(struct kmem_cache_stats));
 
-       /*
-        * Must align to double word boundary for the double cmpxchg
-        * instructions to work; see __pcpu_double_call_return_bool().
-        */
-       s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
-                                    2 * sizeof(void *));
+       s->cpu_stats = alloc_percpu(struct kmem_cache_stats);
 
-       if (!s->cpu_slab)
+       if (!s->cpu_stats)
                return 0;
 
-       init_kmem_cache_cpus(s);
-
        return 1;
 }
+#endif
 
 static int init_percpu_sheaves(struct kmem_cache *s)
 {
@@ -7292,7 +7063,9 @@ void __kmem_cache_release(struct kmem_cache *s)
 {
        cache_random_seq_destroy(s);
        pcs_destroy(s);
-       free_percpu(s->cpu_slab);
+#ifdef CONFIG_SLUB_STATS
+       free_percpu(s->cpu_stats);
+#endif
        free_kmem_cache_nodes(s);
 }
 
@@ -7989,12 +7762,6 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 
        memcpy(s, static_cache, kmem_cache->object_size);
 
-       /*
-        * This runs very early, and only the boot processor is supposed to be
-        * up.  Even if it weren't true, IRQs are not up so we couldn't fire
-        * IPIs around.
-        */
-       __flush_cpu_slab(s, smp_processor_id());
        for_each_kmem_cache_node(s, node, n) {
                struct slab *p;
 
@@ -8209,8 +7976,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
        if (!init_kmem_cache_nodes(s))
                goto out;
 
-       if (!alloc_kmem_cache_cpus(s))
+#ifdef CONFIG_SLUB_STATS
+       if (!alloc_kmem_cache_stats(s))
                goto out;
+#endif
 
        err = init_percpu_sheaves(s);
        if (err)
@@ -8529,33 +8298,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        if (!nodes)
                return -ENOMEM;
 
-       if (flags & SO_CPU) {
-               int cpu;
-
-               for_each_possible_cpu(cpu) {
-                       struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
-                                                              cpu);
-                       int node;
-                       struct slab *slab;
-
-                       slab = READ_ONCE(c->slab);
-                       if (!slab)
-                               continue;
-
-                       node = slab_nid(slab);
-                       if (flags & SO_TOTAL)
-                               x = slab->objects;
-                       else if (flags & SO_OBJECTS)
-                               x = slab->inuse;
-                       else
-                               x = 1;
-
-                       total += x;
-                       nodes[node] += x;
-
-               }
-       }
-
        /*
         * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
         * already held which will conflict with an existing lock order:
@@ -8926,7 +8668,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
                return -ENOMEM;
 
        for_each_online_cpu(cpu) {
-               unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
+               unsigned int x = per_cpu_ptr(s->cpu_stats, cpu)->stat[si];
 
                data[cpu] = x;
                sum += x;
@@ -8952,7 +8694,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
        int cpu;
 
        for_each_online_cpu(cpu)
-               per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
+               per_cpu_ptr(s->cpu_stats, cpu)->stat[si] = 0;
 }
 
 #define STAT_ATTR(si, text)                                    \