From: Vlastimil Babka (SUSE) Date: Wed, 11 Mar 2026 08:25:56 +0000 (+0100) Subject: slab: create barns for online memoryless nodes X-Git-Tag: v7.1-rc1~165^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7f693882f00963fd7808e333b86a87e0f9b9873b;p=thirdparty%2Fkernel%2Fstable.git slab: create barns for online memoryless nodes Ming Lei has reported [1] a performance regression due to replacing cpu (partial) slabs with sheaves. With slub stats enabled, a large amount of slowpath allocations were observed. The affected system has 8 online NUMA nodes but only 2 have memory. For sheaves to work effectively on given cpu, its NUMA node has to have struct node_barn allocated. Those are currently only allocated on nodes with memory (N_MEMORY) where kmem_cache_node also exist as the goal is to cache only node-local objects. But in order to have good performance on a memoryless node, we need its barn to exist and use sheaves to cache non-local objects (as no local objects can exist anyway). Therefore change the implementation to allocate barns on all online nodes, tracked in a new nodemask slab_barn_nodes. Also add a cpu hotplug callback as that's when a memoryless node can become online. Change both get_barn() and rcu_sheaf->node assignment to numa_node_id() so it's returned to the barn of the local cpu's (potentially memoryless) node, and not to the nearest node with memory anymore. On systems with CONFIG_HAVE_MEMORYLESS_NODES=y (which are not the main target of this change) barns did not exist on memoryless nodes, but get_barn() using numa_mem_id() meant a barn was returned from the nearest node with memory. This works, but the barn lock contention increases with every such memoryless node. With this change, barn will be allocated also on the memoryless node, reducing this contention in exchange for increased memory consumption. Reported-by: Ming Lei Link: https://lore.kernel.org/all/aZ0SbIqaIkwoW2mB@fedora/ [1] Link: https://patch.msgid.link/20260311-b4-slab-memoryless-barns-v1-2-70ab850be4ce@kernel.org Signed-off-by: Vlastimil Babka (SUSE) Reviewed-by: Harry Yoo Reviewed-by: Hao Li --- diff --git a/mm/slub.c b/mm/slub.c index 6f65cc1361084..73fe34014c6ea 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -453,7 +453,7 @@ static inline struct node_barn *get_barn_node(struct kmem_cache *s, int node) */ static inline struct node_barn *get_barn(struct kmem_cache *s) { - return get_barn_node(s, numa_mem_id()); + return get_barn_node(s, numa_node_id()); } /* @@ -472,6 +472,12 @@ static inline struct node_barn *get_barn(struct kmem_cache *s) */ static nodemask_t slab_nodes; +/* + * Similar to slab_nodes but for where we have node_barn allocated. + * Corresponds to N_ONLINE nodes. + */ +static nodemask_t slab_barn_nodes; + /* * Workqueue used for flushing cpu and kfree_rcu sheaves. */ @@ -4062,6 +4068,51 @@ void flush_all_rcu_sheaves(void) rcu_barrier(); } +static int slub_cpu_setup(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + struct kmem_cache *s; + int ret = 0; + + /* + * we never clear a nid so it's safe to do a quick check before taking + * the mutex, and then recheck to handle parallel cpu hotplug safely + */ + if (node_isset(nid, slab_barn_nodes)) + return 0; + + mutex_lock(&slab_mutex); + + if (node_isset(nid, slab_barn_nodes)) + goto out; + + list_for_each_entry(s, &slab_caches, list) { + struct node_barn *barn; + + /* + * barn might already exist if a previous callback failed midway + */ + if (!cache_has_sheaves(s) || get_barn_node(s, nid)) + continue; + + barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); + + if (!barn) { + ret = -ENOMEM; + goto out; + } + + barn_init(barn); + s->per_node[nid].barn = barn; + } + node_set(nid, slab_barn_nodes); + +out: + mutex_unlock(&slab_mutex); + + return ret; +} + /* * Use the cpu notifier to insure that the cpu slabs are flushed when * necessary. @@ -5916,7 +5967,7 @@ do_free: rcu_sheaf = NULL; } else { pcs->rcu_free = NULL; - rcu_sheaf->node = numa_mem_id(); + rcu_sheaf->node = numa_node_id(); } /* @@ -7577,7 +7628,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) if (slab_state == DOWN || !cache_has_sheaves(s)) return 1; - for_each_node_mask(node, slab_nodes) { + for_each_node_mask(node, slab_barn_nodes) { struct node_barn *barn; barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); @@ -8230,6 +8281,7 @@ static int slab_mem_going_online_callback(int nid) * and barn initialized for the new node. */ node_set(nid, slab_nodes); + node_set(nid, slab_barn_nodes); out: mutex_unlock(&slab_mutex); return ret; @@ -8308,7 +8360,7 @@ static void __init bootstrap_cache_sheaves(struct kmem_cache *s) if (!capacity) return; - for_each_node_mask(node, slab_nodes) { + for_each_node_mask(node, slab_barn_nodes) { struct node_barn *barn; barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); @@ -8380,6 +8432,9 @@ void __init kmem_cache_init(void) for_each_node_state(node, N_MEMORY) node_set(node, slab_nodes); + for_each_online_node(node) + node_set(node, slab_barn_nodes); + create_boot_cache(kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); @@ -8406,7 +8461,7 @@ void __init kmem_cache_init(void) /* Setup random freelists for each cache */ init_freelist_randomization(); - cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, + cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", slub_cpu_setup, slub_cpu_dead); pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",