From: Mike Rapoport (Microsoft) Date: Wed, 11 Feb 2026 10:31:41 +0000 (+0200) Subject: mm: cache struct page for empty_zero_page and return it from ZERO_PAGE() X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=26513781d1b3a1e8b4b576ed62751d604a69b374;p=thirdparty%2Flinux.git mm: cache struct page for empty_zero_page and return it from ZERO_PAGE() For most architectures every invocation of ZERO_PAGE() does virt_to_page(empty_zero_page). But empty_zero_page is in BSS and it is enough to get its struct page once at initialization time and then use it whenever a zero page should be accessed. Add yet another __zero_page variable that will be initialized as virt_to_page(empty_zero_page) for most architectures in a weak arch_setup_zero_pages() function. For architectures that use colored zero pages (MIPS and s390) rename their setup_zero_pages() to arch_setup_zero_pages() and make it global rather than static. For architectures that cannot use virt_to_page() for BSS (arm64 and sparc64) add override of arch_setup_zero_pages(). Link: https://lkml.kernel.org/r/20260211103141.3215197-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Catalin Marinas Acked-by: David Hildenbrand (Arm) Acked-by: Liam R. Howlett Cc: Andreas Larsson Cc: "Borislav Petkov (AMD)" Cc: Christophe Leroy (CS GROUP) Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Lorenzo Stoakes Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russell King Cc: Stafford Horne Cc: Suren Baghdasaryan Cc: Vineet Gupta Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 769570e43c185..aa4b13da6371a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -106,12 +106,6 @@ static inline void arch_leave_lazy_mmu_mode(void) #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ local_flush_tlb_page_nonotify(vma, address) -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 96711b8578fd0..417ec7efe5692 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -328,6 +328,11 @@ void __init bootmem_init(void) memblock_dump_all(); } +void __init arch_setup_zero_pages(void) +{ + __zero_page = phys_to_page(__pa_symbol(empty_zero_page)); +} + void __init arch_mm_preinit(void) { unsigned int flags = SWIOTLB_VERBOSE; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4f6449ad02cae..55b25e85122a3 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -56,10 +56,7 @@ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); -/* - * Not static inline because used by IP27 special magic initialization code - */ -static void __init setup_zero_pages(void) +void __init arch_setup_zero_pages(void) { unsigned int order; @@ -450,7 +447,6 @@ void __init arch_mm_preinit(void) BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); maar_init(); - setup_zero_pages(); /* Setup zeroed pages. */ highmem_init(); #ifdef CONFIG_64BIT @@ -461,11 +457,6 @@ void __init arch_mm_preinit(void) 0x80000000 - 4, KCORE_TEXT); #endif } -#else /* CONFIG_NUMA */ -void __init arch_mm_preinit(void) -{ - setup_zero_pages(); /* This comes from node 0 */ -} #endif /* !CONFIG_NUMA */ void free_init_pages(const char *what, unsigned long begin, unsigned long end) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 3c20475cbee22..1f72efc2a579f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -69,7 +69,7 @@ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); -static void __init setup_zero_pages(void) +void __init arch_setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; @@ -159,8 +159,6 @@ void __init arch_mm_preinit(void) cpumask_set_cpu(0, mm_cpumask(&init_mm)); pv_init(); - - setup_zero_pages(); /* Setup zeroed pages. */ } unsigned long memory_block_size_bytes(void) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 615f460c50af4..74ede706fb325 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -210,9 +210,6 @@ extern unsigned long _PAGE_CACHE; extern unsigned long pg_iobits; extern unsigned long _PAGE_ALL_SZ_BITS; -extern struct page *mem_map_zero; -#define ZERO_PAGE(vaddr) (mem_map_zero) - /* PFNs are real physical page numbers. However, mem_map only begins to record * per-page information starting at pfn_base. This is to handle systems where * the first physical page in the machine is at some huge physical address, diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 748790998ff50..3aa47f2b6c6e8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -177,9 +177,6 @@ extern unsigned long sparc_ramdisk_image64; extern unsigned int sparc_ramdisk_image; extern unsigned int sparc_ramdisk_size; -struct page *mem_map_zero __read_mostly; -EXPORT_SYMBOL(mem_map_zero); - unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly; unsigned long sparc64_kern_pri_context __read_mostly; @@ -2490,11 +2487,17 @@ static void __init register_page_bootmem_info(void) register_page_bootmem_info_node(NODE_DATA(i)); #endif } -void __init mem_init(void) + +void __init arch_setup_zero_pages(void) { phys_addr_t zero_page_pa = kern_base + ((unsigned long)&empty_zero_page[0] - KERNBASE); + __zero_page = phys_to_page(zero_page_pa); +} + +void __init mem_init(void) +{ /* * Must be done after boot memory is put on freelist, because here we * might set fields in deferred struct pages that have not yet been @@ -2503,12 +2506,6 @@ void __init mem_init(void) */ register_page_bootmem_info(); - /* - * Set up the zero page, mark it reserved, so that page count - * is not manipulated when freeing the page from user ptes. - */ - mem_map_zero = pfn_to_page(PHYS_PFN(zero_page_pa)); - if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_ecache_flush_init(); } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2a05c3885f85b..776993d4567b4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1929,6 +1929,8 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot) * For architectures that don't __HAVE_COLOR_ZERO_PAGE the zero page lives in * empty_zero_page in BSS. */ +void arch_setup_zero_pages(void); + #ifdef __HAVE_COLOR_ZERO_PAGE static inline int is_zero_pfn(unsigned long pfn) { @@ -1956,10 +1958,13 @@ static inline unsigned long zero_pfn(unsigned long addr) } extern uint8_t empty_zero_page[PAGE_SIZE]; +extern struct page *__zero_page; -#ifndef ZERO_PAGE -#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) -#endif +static inline struct page *_zero_page(unsigned long addr) +{ + return __zero_page; +} +#define ZERO_PAGE(vaddr) _zero_page(vaddr) #endif /* __HAVE_COLOR_ZERO_PAGE */ diff --git a/mm/mm_init.c b/mm/mm_init.c index a0472d496c917..f903747ca854d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -59,7 +59,10 @@ EXPORT_SYMBOL(zero_page_pfn); #ifndef __HAVE_COLOR_ZERO_PAGE uint8_t empty_zero_page[PAGE_SIZE] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -#endif + +struct page *__zero_page __ro_after_init; +EXPORT_SYMBOL(__zero_page); +#endif /* __HAVE_COLOR_ZERO_PAGE */ #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; @@ -2680,12 +2683,21 @@ static void __init mem_init_print_info(void) ); } -static int __init init_zero_page_pfn(void) +#ifndef __HAVE_COLOR_ZERO_PAGE +/* + * architectures that __HAVE_COLOR_ZERO_PAGE must define this function + */ +void __init __weak arch_setup_zero_pages(void) +{ + __zero_page = virt_to_page(empty_zero_page); +} +#endif + +static void __init init_zero_page_pfn(void) { + arch_setup_zero_pages(); zero_page_pfn = page_to_pfn(ZERO_PAGE(0)); - return 0; } -early_initcall(init_zero_page_pfn); void __init __weak arch_mm_preinit(void) { @@ -2709,6 +2721,7 @@ void __init mm_core_init_early(void) void __init mm_core_init(void) { arch_mm_preinit(); + init_zero_page_pfn(); /* Initializations relying on SMP setup */ BUILD_BUG_ON(MAX_ZONELISTS > 2);