From 9fac145b6d3fe570277438f8d860eabf229dc545 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 11 Jan 2026 10:21:01 +0200 Subject: [PATCH] mm, arch: consolidate hugetlb CMA reservation Every architecture that supports hugetlb_cma command line parameter reserves CMA areas for hugetlb during setup_arch(). This obfuscates the ordering of hugetlb CMA initialization with respect to the rest initialization of the core MM. Introduce arch_hugetlb_cma_order() callback to allow architectures report the desired order-per-bit of CMA areas and provide a week implementation of arch_hugetlb_cma_order() for architectures that don't support hugetlb with CMA. Use this callback in hugetlb_cma_reserve() instead if passing the order as parameter and call hugetlb_cma_reserve() from mm_core_init_early() rather than have it spread over architecture specific code. Link: https://lkml.kernel.org/r/20260111082105.290734-28-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Alex Shi Cc: Andreas Larsson Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: David Hildenbrand Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Magnus Lindholm Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Muchun Song Cc: Oscar Salvador Cc: Palmer Dabbelt Cc: Pratyush Yadav Cc: Richard Weinberger Cc: "Ritesh Harjani (IBM)" Cc: Russell King Cc: Stafford Horne Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- .../driver-api/cxl/linux/early-boot.rst | 2 +- arch/arm64/include/asm/hugetlb.h | 2 -- arch/arm64/mm/hugetlbpage.c | 10 +++------- arch/arm64/mm/init.c | 9 --------- arch/powerpc/include/asm/hugetlb.h | 5 ----- arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/mm/hugetlbpage.c | 11 ++++------- arch/riscv/mm/hugetlbpage.c | 8 ++++++++ arch/riscv/mm/init.c | 2 -- arch/s390/kernel/setup.c | 2 -- arch/s390/mm/hugetlbpage.c | 8 ++++++++ arch/x86/kernel/setup.c | 4 ---- arch/x86/mm/hugetlbpage.c | 8 ++++++++ include/linux/hugetlb.h | 6 ++++-- mm/hugetlb_cma.c | 19 ++++++++++++++----- mm/mm_init.c | 1 + 16 files changed, 51 insertions(+), 47 deletions(-) diff --git a/Documentation/driver-api/cxl/linux/early-boot.rst b/Documentation/driver-api/cxl/linux/early-boot.rst index a7fc6fc85fbef..414481f33819d 100644 --- a/Documentation/driver-api/cxl/linux/early-boot.rst +++ b/Documentation/driver-api/cxl/linux/early-boot.rst @@ -125,7 +125,7 @@ The contiguous memory allocator (CMA) enables reservation of contiguous memory regions on NUMA nodes during early boot. However, CMA cannot reserve memory on NUMA nodes that are not online during early boot. :: - void __init hugetlb_cma_reserve(int order) { + void __init hugetlb_cma_reserve(void) { if (!node_online(nid)) /* do not allow reservations */ } diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 44c1f757bfcf8..e6f8ff3cc6306 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -56,8 +56,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void __init arm64_hugetlb_cma_reserve(void); - #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 1d90a7e753336..f8dd58ab67a82 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -36,16 +36,12 @@ * huge pages could still be served from those areas. */ #ifdef CONFIG_CMA -void __init arm64_hugetlb_cma_reserve(void) +unsigned int arch_hugetlb_cma_order(void) { - int order; - if (pud_sect_supported()) - order = PUD_SHIFT - PAGE_SHIFT; - else - order = CONT_PMD_SHIFT - PAGE_SHIFT; + return PUD_SHIFT - PAGE_SHIFT; - hugetlb_cma_reserve(order); + return CONT_PMD_SHIFT - PAGE_SHIFT; } #endif /* CONFIG_CMA */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9d271aff76526..96711b8578fd0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -311,15 +311,6 @@ void __init bootmem_init(void) arch_numa_init(); - /* - * must be done after arch_numa_init() which calls numa_init() to - * initialize node_online_map that gets used in hugetlb_cma_reserve() - * while allocating required CMA size across online nodes. - */ -#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) - arm64_hugetlb_cma_reserve(); -#endif - kvm_hyp_reserve(); dma_limits_init(); diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 86326587e58de..6d32a42994458 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -68,7 +68,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); -void gigantic_hugetlb_cma_reserve(void) __init; #include #else /* ! CONFIG_HUGETLB_PAGE */ @@ -77,10 +76,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, { } -static inline void __init gigantic_hugetlb_cma_reserve(void) -{ -} - static inline void __init hugetlbpage_init_defaultsize(void) { } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index c8c42b419742f..cb5b73adc2506 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -1003,7 +1003,6 @@ void __init setup_arch(char **cmdline_p) fadump_cma_init(); kdump_cma_reserve(); kvm_cma_reserve(); - gigantic_hugetlb_cma_reserve(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d3c1b749dcfc8..558fafb82b8ac 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -200,18 +200,15 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void __init gigantic_hugetlb_cma_reserve(void) +unsigned int __init arch_hugetlb_cma_order(void) { - unsigned long order = 0; - if (radix_enabled()) - order = PUD_SHIFT - PAGE_SHIFT; + return PUD_SHIFT - PAGE_SHIFT; else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) /* * For pseries we do use ibm,expected#pages for reserving 16G pages. */ - order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; + return mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; - if (order) - hugetlb_cma_reserve(order); + return 0; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 375dd96bb4a0d..a6d217112cf46 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -447,3 +447,11 @@ static __init int gigantic_pages_init(void) } arch_initcall(gigantic_pages_init); #endif + +unsigned int __init arch_hugetlb_cma_order(void) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return PUD_SHIFT - PAGE_SHIFT; + + return 0; +} diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 11ac4041afc0e..848efeb9e1636 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -311,8 +311,6 @@ static void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); dma_contiguous_reserve(dma32_phys_limit); - if (IS_ENABLED(CONFIG_64BIT)) - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); } #ifdef CONFIG_RELOCATABLE diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c1fe0b53c5ac5..b60284328fe3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -963,8 +963,6 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (cpu_has_edat2()) - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d42e61c7594ea..d93417d1e53c3 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -255,3 +255,11 @@ bool __init arch_hugetlb_valid_size(unsigned long size) else return false; } + +unsigned int __init arch_hugetlb_cma_order(void) +{ + if (cpu_has_edat2()) + return PUD_SHIFT - PAGE_SHIFT; + + return 0; +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e2318fa9b1bbd..e1efe3975aa08 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1189,10 +1189,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); - if (boot_cpu_has(X86_FEATURE_GBPAGES)) { - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - } - /* * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 58f7f2bd535d5..3b26621c91286 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -42,3 +42,11 @@ static __init int gigantic_pages_init(void) arch_initcall(gigantic_pages_init); #endif #endif + +unsigned int __init arch_hugetlb_cma_order(void) +{ + if (boot_cpu_has(X86_FEATURE_GBPAGES)) + return PUD_SHIFT - PAGE_SHIFT; + + return 0; +} diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 694f6e83c637b..00e6a73e7bba9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -281,6 +281,8 @@ void fixup_hugetlb_reservations(struct vm_area_struct *vma); void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); +unsigned int arch_hugetlb_cma_order(void); + #else /* !CONFIG_HUGETLB_PAGE */ static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) @@ -1322,9 +1324,9 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, } #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) -extern void __init hugetlb_cma_reserve(int order); +extern void __init hugetlb_cma_reserve(void); #else -static inline __init void hugetlb_cma_reserve(int order) +static inline __init void hugetlb_cma_reserve(void) { } #endif diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index e8e4dc7182d54..b1eb5998282c5 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -134,12 +134,24 @@ static int __init cmdline_parse_hugetlb_cma_only(char *p) early_param("hugetlb_cma_only", cmdline_parse_hugetlb_cma_only); -void __init hugetlb_cma_reserve(int order) +unsigned int __weak arch_hugetlb_cma_order(void) { - unsigned long size, reserved, per_node; + return 0; +} + +void __init hugetlb_cma_reserve(void) +{ + unsigned long size, reserved, per_node, order; bool node_specific_cma_alloc = false; int nid; + if (!hugetlb_cma_size) + return; + + order = arch_hugetlb_cma_order(); + if (!order) + return; + /* * HugeTLB CMA reservation is required for gigantic * huge pages which could not be allocated via the @@ -149,9 +161,6 @@ void __init hugetlb_cma_reserve(int order) VM_WARN_ON(order <= MAX_PAGE_ORDER); cma_reserve_called = true; - if (!hugetlb_cma_size) - return; - hugetlb_bootmem_set_nodes(); for (nid = 0; nid < MAX_NUMNODES; nid++) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 31246fe5c3619..0cfbdef91d721 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2677,6 +2677,7 @@ void __init __weak mem_init(void) void __init mm_core_init_early(void) { + hugetlb_cma_reserve(); hugetlb_bootmem_alloc(); free_area_init(); -- 2.47.3