slab: simplify kmalloc_nolock()

author Vlastimil Babka <vbabka@suse.cz>

Fri, 23 Jan 2026 06:52:53 +0000 (07:52 +0100)

committer Vlastimil Babka <vbabka@suse.cz>

Thu, 29 Jan 2026 08:29:27 +0000 (09:29 +0100)
author Vlastimil Babka <vbabka@suse.cz>
Fri, 23 Jan 2026 06:52:53 +0000 (07:52 +0100)
committer Vlastimil Babka <vbabka@suse.cz>
Thu, 29 Jan 2026 08:29:27 +0000 (09:29 +0100)
diff --git a/mm/slab.h b/mm/slab.h

index f88568e31268e4a3c2d87d7275fd25ede1ef5480..31375198e19c24f9f40523c8be6afa20cba4ef9d 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -190,7 +190,6 @@ struct kmem_cache_order_objects {
   */
  struct kmem_cache {
         struct kmem_cache_cpu __percpu *cpu_slab;
-       struct lock_class_key lock_key;
         struct slub_percpu_sheaves __percpu *cpu_sheaves;
         /* Used for retrieving partial slabs, etc. */
         slab_flags_t flags;
diff --git a/mm/slub.c b/mm/slub.c

index 883effab47fd395c09ddeb052db9dbe038209567..0ab1423ee0fad3e86f9b1da00c8ae3bc2881e6ea 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3690,29 +3690,12 @@ static inline unsigned int init_tid(int cpu)
  
  static void init_kmem_cache_cpus(struct kmem_cache *s)
  {
-#ifdef CONFIG_PREEMPT_RT
-       /*
-        * Register lockdep key for non-boot kmem caches to avoid
-        * WARN_ON_ONCE(static_obj(key))) in lockdep_register_key()
-        */
-       bool finegrain_lockdep = !init_section_contains(s, 1);
-#else
-       /*
-        * Don't bother with different lockdep classes for each
-        * kmem_cache, since we only use local_trylock_irqsave().
-        */
-       bool finegrain_lockdep = false;
-#endif
         int cpu;
         struct kmem_cache_cpu *c;
  
-       if (finegrain_lockdep)
-               lockdep_register_key(&s->lock_key);
         for_each_possible_cpu(cpu) {
                 c = per_cpu_ptr(s->cpu_slab, cpu);
                 local_trylock_init(&c->lock);
-               if (finegrain_lockdep)
-                       lockdep_set_class(&c->lock, &s->lock_key);
                 c->tid = init_tid(cpu);
         }
  }
@@ -3799,47 +3782,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
         }
  }
  
-/*
- * ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock
- * can be acquired without a deadlock before invoking the function.
- *
- * Without LOCKDEP we trust the code to be correct. kmalloc_nolock() is
- * using local_lock_is_locked() properly before calling local_lock_cpu_slab(),
- * and kmalloc() is not used in an unsupported context.
- *
- * With LOCKDEP, on PREEMPT_RT lockdep does its checking in local_lock_irqsave().
- * On !PREEMPT_RT we use trylock to avoid false positives in NMI, but
- * lockdep_assert() will catch a bug in case:
- * #1
- * kmalloc() -> ___slab_alloc() -> irqsave -> NMI -> bpf -> kmalloc_nolock()
- * or
- * #2
- * kmalloc() -> ___slab_alloc() -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock()
- *
- * On PREEMPT_RT an invocation is not possible from IRQ-off or preempt
- * disabled context. The lock will always be acquired and if needed it
- * block and sleep until the lock is available.
- * #1 is possible in !PREEMPT_RT only.
- * #2 is possible in both with a twist that irqsave is replaced with rt_spinlock:
- * kmalloc() -> ___slab_alloc() -> rt_spin_lock(kmem_cache_A) ->
- *    tracepoint/kprobe -> bpf -> kmalloc_nolock() -> rt_spin_lock(kmem_cache_B)
- *
- * local_lock_is_locked() prevents the case kmem_cache_A == kmem_cache_B
- */
-#if defined(CONFIG_PREEMPT_RT) || !defined(CONFIG_LOCKDEP)
-#define local_lock_cpu_slab(s, flags)  \
-       local_lock_irqsave(&(s)->cpu_slab->lock, flags)
-#else
-#define local_lock_cpu_slab(s, flags)                                         \
-       do {                                                                   \
-               bool __l = local_trylock_irqsave(&(s)->cpu_slab->lock, flags); \
-               lockdep_assert(__l);                                           \
-       } while (0)
-#endif
-
-#define local_unlock_cpu_slab(s, flags)        \
-       local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)
-
  static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
  {
         unsigned long flags;
@@ -4405,20 +4347,6 @@ success:
         return object;
  }
  
-/*
- * We disallow kprobes in ___slab_alloc() to prevent reentrance
- *
- * kmalloc() -> ___slab_alloc() -> local_lock_cpu_slab() protected part of
- * ___slab_alloc() manipulating c->freelist -> kprobe -> bpf ->
- * kmalloc_nolock() or kfree_nolock() -> __update_cpu_freelist_fast()
- * manipulating c->freelist without lock.
- *
- * This does not prevent kprobe in functions called from ___slab_alloc() such as
- * local_lock_irqsave() itself, and that is fine, we only need to protect the
- * c->freelist manipulation in ___slab_alloc() itself.
- */
-NOKPROBE_SYMBOL(___slab_alloc);
-
  static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
                 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
  {
@@ -5259,13 +5187,13 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
         if (unlikely(!size))
                 return ZERO_SIZE_PTR;
  
-       if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible())
-               /*
-                * kmalloc_nolock() in PREEMPT_RT is not supported from
-                * non-preemptible context because local_lock becomes a
-                * sleeping lock on RT.
-                */
+       /*
+        * See the comment for the same check in
+        * alloc_frozen_pages_nolock_noprof()
+        */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
                 return NULL;
+
  retry:
         if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
                 return NULL;
@@ -5274,10 +5202,11 @@ retry:
         if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
                 /*
                  * kmalloc_nolock() is not supported on architectures that
-                * don't implement cmpxchg16b, but debug caches don't use
-                * per-cpu slab and per-cpu partial slabs. They rely on
-                * kmem_cache_node->list_lock, so kmalloc_nolock() can
-                * attempt to allocate from debug caches by
+                * don't implement cmpxchg16b and thus need slab_lock()
+                * which could be preempted by a nmi.
+                * But debug caches don't use that and only rely on
+                * kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
+                * to allocate from debug caches by
                  * spin_trylock_irqsave(&n->list_lock, ...)
                  */
                 return NULL;
@@ -5286,42 +5215,31 @@ retry:
         if (ret)
                 goto success;
  
-       ret = ERR_PTR(-EBUSY);
-
         /*
          * Do not call slab_alloc_node(), since trylock mode isn't
          * compatible with slab_pre_alloc_hook/should_failslab and
          * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
          * and slab_post_alloc_hook() directly.
-        *
-        * In !PREEMPT_RT ___slab_alloc() manipulates (freelist,tid) pair
-        * in irq saved region. It assumes that the same cpu will not
-        * __update_cpu_freelist_fast() into the same (freelist,tid) pair.
-        * Therefore use in_nmi() to check whether particular bucket is in
-        * irq protected section.
-        *
-        * If in_nmi() && local_lock_is_locked(s->cpu_slab) then it means that
-        * this cpu was interrupted somewhere inside ___slab_alloc() after
-        * it did local_lock_irqsave(&s->cpu_slab->lock, flags).
-        * In this case fast path with __update_cpu_freelist_fast() is not safe.
          */
-       if (!in_nmi() || !local_lock_is_locked(&s->cpu_slab->lock))
-               ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);
+       ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);
  
-       if (PTR_ERR(ret) == -EBUSY) {
-               if (can_retry) {
-                       /* pick the next kmalloc bucket */
-                       size = s->object_size + 1;
-                       /*
-                        * Another alternative is to
-                        * if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
-                        * else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
-                        * to retry from bucket of the same size.
-                        */
-                       can_retry = false;
-                       goto retry;
-               }
-               ret = NULL;
+       /*
+        * It's possible we failed due to trylock as we preempted someone with
+        * the sheaves locked, and the list_lock is also held by another cpu.
+        * But it should be rare that multiple kmalloc buckets would have
+        * sheaves locked, so try a larger one.
+        */
+       if (!ret && can_retry) {
+               /* pick the next kmalloc bucket */
+               size = s->object_size + 1;
+               /*
+                * Another alternative is to
+                * if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
+                * else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
+                * to retry from bucket of the same size.
+                */
+               can_retry = false;
+               goto retry;
         }
  
  success:
@@ -7374,10 +7292,6 @@ void __kmem_cache_release(struct kmem_cache *s)
  {
         cache_random_seq_destroy(s);
         pcs_destroy(s);
-#ifdef CONFIG_PREEMPT_RT
-       if (s->cpu_slab)
-               lockdep_unregister_key(&s->lock_key);
-#endif
         free_percpu(s->cpu_slab);
         free_kmem_cache_nodes(s);
  }
author	Vlastimil Babka <vbabka@suse.cz>
	Fri, 23 Jan 2026 06:52:53 +0000 (07:52 +0100)
committer	Vlastimil Babka <vbabka@suse.cz>
	Thu, 29 Jan 2026 08:29:27 +0000 (09:29 +0100)
mm/slab.h		patch \| blob \| blame \| history
mm/slub.c		patch \| blob \| blame \| history