]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/sparse: remove sparse buffer pre-allocation mechanism
authorMuchun Song <songmuchun@bytedance.com>
Fri, 10 Apr 2026 09:24:19 +0000 (17:24 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 29 May 2026 04:04:49 +0000 (21:04 -0700)
Commit 9bdac9142407 ("sparsemem: Put mem map for one node together.")
introduced a mechanism to pre-allocate a large memory block to hold all
memmaps for a NUMA node upfront.

However, the original commit message did not clearly state the actual
benefits or the necessity of explicitly pre-allocating a single chunk for
all memmap areas of a given node.

One of the concerns about removing this pre-allocation is that the
subsequent per-section memmap allocations could become scattered around,
and might turn too many memory blocks/sections into an "un-offlinable"
state.  However, tests show that even without the explicit node-wide
pre-allocation, memblock still allocates memory closely and back-to-back.
When tracing vmemmap_set_pmd allocations, the physical chunks allocated by
memblock are strictly adjacent to each other in a single contiguous
physical range (mapped top-down).  Because they are packed tightly
together naturally, they will at most consume or pollute the exact same
number of memory blocks as the explicit pre-allocation did.

Another concern is the boot performance impact of calling memmap_alloc()
multiple times compared to one large node-wide allocation.  Tests on a
256GB VM showed that memmap allocation time increased from 199,555 ns to
741,292 ns.  Even though it is 3.7x slower, on a 1TB machine, the entire
memory allocation time would only take a few milliseconds.  This boot
performance difference is completely negligible.

Since no negative impact on memory offlining behavior or noticeable boot
performance regression was found, this patch proposes removing the
explicit node-wide memmap pre-allocation mechanism to reduce the
maintenance burden.

Link: https://lore.kernel.org/20260410092419.2446420-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Liam Howlett <liam@infradead.org>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h
mm/sparse-vmemmap.c
mm/sparse.c

index e3b6112a8d79aaf4d87d7e2e64f97e0b0cc2b29c..8a0078a4dc7847f627dcb077be56d0047c8d3d3a 100644 (file)
@@ -4855,7 +4855,6 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
 }
 #endif
 
-void *sparse_buffer_alloc(unsigned long size);
 unsigned long section_map_size(void);
 struct page * __populate_section_memmap(unsigned long pfn,
                unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
index 3c35d2303a6197962241e0f275f60e8daa44d29d..43f82621dd9295ba3fc0c31f5a6269deb1bdd862 100644 (file)
@@ -87,15 +87,10 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
 void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
                                         struct vmem_altmap *altmap)
 {
-       void *ptr;
-
        if (altmap)
                return altmap_alloc_block_buf(size, altmap);
 
-       ptr = sparse_buffer_alloc(size);
-       if (!ptr)
-               ptr = vmemmap_alloc_block(size, node);
-       return ptr;
+       return vmemmap_alloc_block(size, node);
 }
 
 static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
index e13f9f5fa0909244c34d6c686245d16b09e61419..16ac6df3c89fa26b3a459583d1be8d71b2078329 100644 (file)
@@ -239,12 +239,9 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
                struct dev_pagemap *pgmap)
 {
        unsigned long size = section_map_size();
-       struct page *map = sparse_buffer_alloc(size);
+       struct page *map;
        phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
 
-       if (map)
-               return map;
-
        map = memmap_alloc(size, size, addr, nid, false);
        if (!map)
                panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
@@ -254,55 +251,6 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
 }
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
-static void *sparsemap_buf __meminitdata;
-static void *sparsemap_buf_end __meminitdata;
-
-static inline void __meminit sparse_buffer_free(unsigned long size)
-{
-       WARN_ON(!sparsemap_buf || size == 0);
-       memblock_free(sparsemap_buf, size);
-}
-
-static void __init sparse_buffer_init(unsigned long size, int nid)
-{
-       phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
-       WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
-       /*
-        * Pre-allocated buffer is mainly used by __populate_section_memmap
-        * and we want it to be properly aligned to the section size - this is
-        * especially the case for VMEMMAP which maps memmap to PMDs
-        */
-       sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
-       sparsemap_buf_end = sparsemap_buf + size;
-}
-
-static void __init sparse_buffer_fini(void)
-{
-       unsigned long size = sparsemap_buf_end - sparsemap_buf;
-
-       if (sparsemap_buf && size > 0)
-               sparse_buffer_free(size);
-       sparsemap_buf = NULL;
-}
-
-void * __meminit sparse_buffer_alloc(unsigned long size)
-{
-       void *ptr = NULL;
-
-       if (sparsemap_buf) {
-               ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
-               if (ptr + size > sparsemap_buf_end)
-                       ptr = NULL;
-               else {
-                       /* Free redundant aligned space */
-                       if ((unsigned long)(ptr - sparsemap_buf) > 0)
-                               sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
-                       sparsemap_buf = ptr + size;
-               }
-       }
-       return ptr;
-}
-
 void __weak __meminit vmemmap_populate_print_last(void)
 {
 }
@@ -360,8 +308,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
                goto failed;
        }
 
-       sparse_buffer_init(map_count * section_map_size(), nid);
-
        sparse_vmemmap_init_nid_early(nid);
 
        for_each_present_section_nr(pnum_begin, pnum) {
@@ -379,7 +325,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
                                       __func__, nid);
                                pnum_begin = pnum;
                                sparse_usage_fini();
-                               sparse_buffer_fini();
                                goto failed;
                        }
                        memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
@@ -388,7 +333,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
                }
        }
        sparse_usage_fini();
-       sparse_buffer_fini();
        return;
 failed:
        /*