]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86/mm: Fix _pgd_alloc() for Xen PV mode
authorJuergen Gross <jgross@suse.com>
Tue, 22 Apr 2025 13:17:17 +0000 (15:17 +0200)
committerDave Hansen <dave.hansen@linux.intel.com>
Wed, 23 Apr 2025 14:49:14 +0000 (07:49 -0700)
Recently _pgd_alloc() was switched from using __get_free_pages() to
pagetable_alloc_noprof(), which might return a compound page in case
the allocation order is larger than 0.

On x86 this will be the case if CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
is set, even if PTI has been disabled at runtime.

When running as a Xen PV guest (this will always disable PTI), using
a compound page for a PGD will result in VM_BUG_ON_PGFLAGS being
triggered when the Xen code tries to pin the PGD.

Fix the Xen issue together with the not needed 8k allocation for a
PGD with PTI disabled by replacing PGD_ALLOCATION_ORDER with an
inline helper returning the needed order for PGD allocations.

Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}")
Reported-by: Petr Vaněk <arkamar@atlas.cz>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Petr Vaněk <arkamar@atlas.cz>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250422131717.25724-1-jgross%40suse.com
arch/x86/include/asm/pgalloc.h
arch/x86/kernel/machine_kexec_32.c
arch/x86/mm/pgtable.c
arch/x86/platform/efi/efi_64.c

index a33147520044b41c6c7b8e07cbd8b68317129ae5..c88691b15f3c678ff9f0323a5725ebc305cfb1f9 100644 (file)
@@ -6,6 +6,8 @@
 #include <linux/mm.h>          /* for struct page */
 #include <linux/pagemap.h>
 
+#include <asm/cpufeature.h>
+
 #define __HAVE_ARCH_PTE_ALLOC_ONE
 #define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>
@@ -29,16 +31,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
 static inline void paravirt_release_p4d(unsigned long pfn) {}
 #endif
 
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 /*
- * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
- * both 8k in size and 8k-aligned.  That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
+ * In case of Page Table Isolation active, we acquire two PGDs instead of one.
+ * Being order-1, it is both 8k in size and 8k-aligned.  That lets us just
+ * flip bit 12 in a pointer to swap between the two 4k halves.
  */
-#define PGD_ALLOCATION_ORDER 1
-#else
-#define PGD_ALLOCATION_ORDER 0
-#endif
+static inline unsigned int pgd_allocation_order(void)
+{
+       if (cpu_feature_enabled(X86_FEATURE_PTI))
+               return 1;
+       return 0;
+}
 
 /*
  * Allocate and free page tables.
index 80265162aefff9bd7c588f0d097adfe354de681f..1f325304c4a8424f38a380cb592aafa75a243c4d 100644 (file)
@@ -42,7 +42,7 @@ static void load_segments(void)
 
 static void machine_kexec_free_page_tables(struct kimage *image)
 {
-       free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
+       free_pages((unsigned long)image->arch.pgd, pgd_allocation_order());
        image->arch.pgd = NULL;
 #ifdef CONFIG_X86_PAE
        free_page((unsigned long)image->arch.pmd0);
@@ -59,7 +59,7 @@ static void machine_kexec_free_page_tables(struct kimage *image)
 static int machine_kexec_alloc_page_tables(struct kimage *image)
 {
        image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                                   PGD_ALLOCATION_ORDER);
+                                                   pgd_allocation_order());
 #ifdef CONFIG_X86_PAE
        image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
        image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
index a05fcddfc811afa23333656e69a03cfb406fac94..f7ae44d3dd9e0136066dea955f2718968a616463 100644 (file)
@@ -360,7 +360,7 @@ static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
         * We allocate one page for pgd.
         */
        if (!SHARED_KERNEL_PMD)
-               return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
+               return __pgd_alloc(mm, pgd_allocation_order());
 
        /*
         * Now PAE kernel is not running as a Xen domain. We can allocate
@@ -380,7 +380,7 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
 {
-       return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
+       return __pgd_alloc(mm, pgd_allocation_order());
 }
 
 static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
index ac57259a432b8c67f76590ee3050438311084a92..a4b4ebd41b8fab43ef8ae318a50f943083f2af8b 100644 (file)
@@ -73,7 +73,7 @@ int __init efi_alloc_page_tables(void)
        gfp_t gfp_mask;
 
        gfp_mask = GFP_KERNEL | __GFP_ZERO;
-       efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
+       efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order());
        if (!efi_pgd)
                goto fail;
 
@@ -96,7 +96,7 @@ free_p4d:
        if (pgtable_l5_enabled())
                free_page((unsigned long)pgd_page_vaddr(*pgd));
 free_pgd:
-       free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
+       free_pages((unsigned long)efi_pgd, pgd_allocation_order());
 fail:
        return -ENOMEM;
 }