--- /dev/null
+From f2d2f9598ebb0158a3fe17cda0106d7752e654a2 Mon Sep 17 00:00:00 2001
+From: Harry Yoo <harry.yoo@oracle.com>
+Date: Mon, 18 Aug 2025 11:02:05 +0900
+Subject: mm: introduce and use {pgd,p4d}_populate_kernel()
+
+From: Harry Yoo <harry.yoo@oracle.com>
+
+commit f2d2f9598ebb0158a3fe17cda0106d7752e654a2 upstream.
+
+Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
+populating PGD and P4D entries for the kernel address space. These
+helpers ensure proper synchronization of page tables when updating the
+kernel portion of top-level page tables.
+
+Until now, the kernel has relied on each architecture to handle
+synchronization of top-level page tables in an ad-hoc manner. For
+example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct
+mapping and vmemmap mapping changes").
+
+However, this approach has proven fragile for following reasons:
+
+ 1) It is easy to forget to perform the necessary page table
+ synchronization when introducing new changes.
+ For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
+ savings for compound devmaps") overlooked the need to synchronize
+ page tables for the vmemmap area.
+
+ 2) It is also easy to overlook that the vmemmap and direct mapping areas
+ must not be accessed before explicit page table synchronization.
+ For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
+ sub-pmd ranges")) caused crashes by accessing the vmemmap area
+ before calling sync_global_pgds().
+
+To address this, as suggested by Dave Hansen, introduce _kernel() variants
+of the page table population helpers, which invoke architecture-specific
+hooks to properly synchronize page tables. These are introduced in a new
+header file, include/linux/pgalloc.h, so they can be called from common
+code.
+
+They reuse existing infrastructure for vmalloc and ioremap.
+Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
+and the actual synchronization is performed by
+arch_sync_kernel_mappings().
+
+This change currently targets only x86_64, so only PGD and P4D level
+helpers are introduced. Currently, these helpers are no-ops since no
+architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.
+
+In theory, PUD and PMD level helpers can be added later if needed by other
+architectures. For now, 32-bit architectures (x86-32 and arm) only handle
+PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless
+we introduce a PMD level helper.
+
+[harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()]
+ Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com
+Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com
+Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
+Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Kiryl Shutsemau <kas@kernel.org>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: bibo mao <maobibo@loongson.cn>
+Cc: Borislav Betkov <bp@alien8.de>
+Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
+Cc: Dennis Zhou <dennis@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Dmitriy Vyukov <dvyukov@google.com>
+Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Cc: Joao Martins <joao.m.martins@oracle.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Kevin Brodsky <kevin.brodsky@arm.com>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleinxer <tglx@linutronix.de>
+Cc: Thomas Huth <thuth@redhat.com>
+Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ Adjust context ]
+Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++
+ include/linux/pgtable.h | 13 +++++++------
+ mm/kasan/init.c | 12 ++++++------
+ mm/percpu.c | 6 +++---
+ mm/sparse-vmemmap.c | 6 +++---
+ 5 files changed, 48 insertions(+), 18 deletions(-)
+ create mode 100644 include/linux/pgalloc.h
+
+--- /dev/null
++++ b/include/linux/pgalloc.h
+@@ -0,0 +1,29 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_PGALLOC_H
++#define _LINUX_PGALLOC_H
++
++#include <linux/pgtable.h>
++#include <asm/pgalloc.h>
++
++/*
++ * {pgd,p4d}_populate_kernel() are defined as macros to allow
++ * compile-time optimization based on the configured page table levels.
++ * Without this, linking may fail because callers (e.g., KASAN) may rely
++ * on calls to these functions being optimized away when passing symbols
++ * that exist only for certain page table levels.
++ */
++#define pgd_populate_kernel(addr, pgd, p4d) \
++ do { \
++ pgd_populate(&init_mm, pgd, p4d); \
++ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \
++ arch_sync_kernel_mappings(addr, addr); \
++ } while (0)
++
++#define p4d_populate_kernel(addr, p4d, pud) \
++ do { \
++ p4d_populate(&init_mm, p4d, pud); \
++ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \
++ arch_sync_kernel_mappings(addr, addr); \
++ } while (0)
++
++#endif /* _LINUX_PGALLOC_H */
+--- a/include/linux/pgtable.h
++++ b/include/linux/pgtable.h
+@@ -1474,8 +1474,8 @@ static inline int pmd_protnone(pmd_t pmd
+
+ /*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
+- * needs to be called.
++ * and let generic vmalloc, ioremap and page table update code know when
++ * arch_sync_kernel_mappings() needs to be called.
+ */
+ #ifndef ARCH_PAGE_TABLE_SYNC_MASK
+ #define ARCH_PAGE_TABLE_SYNC_MASK 0
+@@ -1608,10 +1608,11 @@ static inline bool arch_has_pfn_modify_c
+ /*
+ * Page Table Modification bits for pgtbl_mod_mask.
+ *
+- * These are used by the p?d_alloc_track*() set of functions an in the generic
+- * vmalloc/ioremap code to track at which page-table levels entries have been
+- * modified. Based on that the code can better decide when vmalloc and ioremap
+- * mapping changes need to be synchronized to other page-tables in the system.
++ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
++ * functions in the generic vmalloc, ioremap and page table update code
++ * to track at which page-table levels entries have been modified.
++ * Based on that the code can better decide when page table changes need
++ * to be synchronized to other page-tables in the system.
+ */
+ #define __PGTBL_PGD_MODIFIED 0
+ #define __PGTBL_P4D_MODIFIED 1
+--- a/mm/kasan/init.c
++++ b/mm/kasan/init.c
+@@ -13,9 +13,9 @@
+ #include <linux/mm.h>
+ #include <linux/pfn.h>
+ #include <linux/slab.h>
++#include <linux/pgalloc.h>
+
+ #include <asm/page.h>
+-#include <asm/pgalloc.h>
+
+ #include "kasan.h"
+
+@@ -188,7 +188,7 @@ static int __ref zero_p4d_populate(pgd_t
+ pud_t *pud;
+ pmd_t *pmd;
+
+- p4d_populate(&init_mm, p4d,
++ p4d_populate_kernel(addr, p4d,
+ lm_alias(kasan_early_shadow_pud));
+ pud = pud_offset(p4d, addr);
+ pud_populate(&init_mm, pud,
+@@ -207,7 +207,7 @@ static int __ref zero_p4d_populate(pgd_t
+ if (!p)
+ return -ENOMEM;
+ } else {
+- p4d_populate(&init_mm, p4d,
++ p4d_populate_kernel(addr, p4d,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
+ }
+@@ -247,10 +247,10 @@ int __ref kasan_populate_early_shadow(co
+ * puds,pmds, so pgd_populate(), pud_populate()
+ * is noops.
+ */
+- pgd_populate(&init_mm, pgd,
++ pgd_populate_kernel(addr, pgd,
+ lm_alias(kasan_early_shadow_p4d));
+ p4d = p4d_offset(pgd, addr);
+- p4d_populate(&init_mm, p4d,
++ p4d_populate_kernel(addr, p4d,
+ lm_alias(kasan_early_shadow_pud));
+ pud = pud_offset(p4d, addr);
+ pud_populate(&init_mm, pud,
+@@ -269,7 +269,7 @@ int __ref kasan_populate_early_shadow(co
+ if (!p)
+ return -ENOMEM;
+ } else {
+- pgd_populate(&init_mm, pgd,
++ pgd_populate_kernel(addr, pgd,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
+ }
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -3172,7 +3172,7 @@ out_free:
+ #endif /* BUILD_EMBED_FIRST_CHUNK */
+
+ #ifdef BUILD_PAGE_FIRST_CHUNK
+-#include <asm/pgalloc.h>
++#include <linux/pgalloc.h>
+
+ #ifndef P4D_TABLE_SIZE
+ #define P4D_TABLE_SIZE PAGE_SIZE
+@@ -3202,7 +3202,7 @@ void __init __weak pcpu_populate_pte(uns
+ new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+- pgd_populate(&init_mm, pgd, new);
++ pgd_populate_kernel(addr, pgd, new);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+@@ -3212,7 +3212,7 @@ void __init __weak pcpu_populate_pte(uns
+ new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!new)
+ goto err_alloc;
+- p4d_populate(&init_mm, p4d, new);
++ p4d_populate_kernel(addr, p4d, new);
+ }
+
+ pud = pud_offset(p4d, addr);
+--- a/mm/sparse-vmemmap.c
++++ b/mm/sparse-vmemmap.c
+@@ -27,9 +27,9 @@
+ #include <linux/spinlock.h>
+ #include <linux/vmalloc.h>
+ #include <linux/sched.h>
++#include <linux/pgalloc.h>
+
+ #include <asm/dma.h>
+-#include <asm/pgalloc.h>
+
+ /*
+ * Allocate a block of memory to be used to back the virtual memory map
+@@ -215,7 +215,7 @@ p4d_t * __meminit vmemmap_p4d_populate(p
+ void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+- p4d_populate(&init_mm, p4d, p);
++ p4d_populate_kernel(addr, p4d, p);
+ }
+ return p4d;
+ }
+@@ -227,7 +227,7 @@ pgd_t * __meminit vmemmap_pgd_populate(u
+ void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+ if (!p)
+ return NULL;
+- pgd_populate(&init_mm, pgd, p);
++ pgd_populate_kernel(addr, pgd, p);
+ }
+ return pgd;
+ }