]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
Revert "x86/kexec: Allocate PGD for x86_64 transition page tables separately"
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 May 2025 14:06:48 +0000 (16:06 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 9 May 2025 07:41:44 +0000 (09:41 +0200)
This reverts commit 6821918f451942aa79759f29677a22f2d4ff4cbe which is
commit 4b5bc2ec9a239bce261ffeafdd63571134102323 upstream.

The patch it relies on is not in the 6.1.y tree, and has been reported
to cause problems, so let's revert it for now.

Reported-by: Eric Hagberg <ehagberg@janestreet.com>
Link: https://lore.kernel.org/r/CAAH4uRBxJ_XvYjCpgYXHqrKSNj6x9pA7X6NBPNTekeQ90DQSJA@mail.gmail.com
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/include/asm/kexec.h
arch/x86/kernel/machine_kexec_64.c

index e2e1ec99c9998f2f37d3af38090d02ca1a6b5363..256eee99afc8f9f75a1c7e6f707c1a5fb2cbe1b6 100644 (file)
@@ -16,7 +16,6 @@
 # define PAGES_NR              4
 #endif
 
-# define KEXEC_CONTROL_PAGE_SIZE       4096
 # define KEXEC_CONTROL_CODE_MAX_SIZE   2048
 
 #ifndef __ASSEMBLY__
@@ -45,6 +44,7 @@ struct kimage;
 /* Maximum address we can use for the control code buffer */
 # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
 
+# define KEXEC_CONTROL_PAGE_SIZE       4096
 
 /* The native architecture */
 # define KEXEC_ARCH KEXEC_ARCH_386
@@ -59,6 +59,9 @@ struct kimage;
 /* Maximum address we can use for the control pages */
 # define KEXEC_CONTROL_MEMORY_LIMIT     (MAXMEM-1)
 
+/* Allocate one page for the pdp and the second for the code */
+# define KEXEC_CONTROL_PAGE_SIZE  (4096UL + 4096UL)
+
 /* The native architecture */
 # define KEXEC_ARCH KEXEC_ARCH_X86_64
 #endif
@@ -143,19 +146,6 @@ struct kimage_arch {
 };
 #else
 struct kimage_arch {
-       /*
-        * This is a kimage control page, as it must not overlap with either
-        * source or destination address ranges.
-        */
-       pgd_t *pgd;
-       /*
-        * The virtual mapping of the control code page itself is used only
-        * during the transition, while the current kernel's pages are all
-        * in place. Thus the intermediate page table pages used to map it
-        * are not control pages, but instead just normal pages obtained
-        * with get_zeroed_page(). And have to be tracked (below) so that
-        * they can be freed.
-        */
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
index 5d61a342871b5ef7ad3ee6eab42100b02940b7b1..24b6eaacc81eb199d6ac2cae1e5134d767314973 100644 (file)
@@ -149,8 +149,7 @@ static void free_transition_pgtable(struct kimage *image)
        image->arch.pte = NULL;
 }
 
-static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
-                                  unsigned long control_page)
+static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 {
        pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
        unsigned long vaddr, paddr;
@@ -161,7 +160,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
        pte_t *pte;
 
        vaddr = (unsigned long)relocate_kernel;
-       paddr = control_page;
+       paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
        pgd += pgd_index(vaddr);
        if (!pgd_present(*pgd)) {
                p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
@@ -220,7 +219,7 @@ static void *alloc_pgt_page(void *data)
        return p;
 }
 
-static int init_pgtable(struct kimage *image, unsigned long control_page)
+static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 {
        struct x86_mapping_info info = {
                .alloc_pgt_page = alloc_pgt_page,
@@ -229,12 +228,12 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
                .kernpg_flag    = _KERNPG_TABLE_NOENC,
        };
        unsigned long mstart, mend;
+       pgd_t *level4p;
        int result;
        int i;
 
-       image->arch.pgd = alloc_pgt_page(image);
-       if (!image->arch.pgd)
-               return -ENOMEM;
+       level4p = (pgd_t *)__va(start_pgtable);
+       clear_page(level4p);
 
        if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
                info.page_flag   |= _PAGE_ENC;
@@ -248,8 +247,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
                mstart = pfn_mapped[i].start << PAGE_SHIFT;
                mend   = pfn_mapped[i].end << PAGE_SHIFT;
 
-               result = kernel_ident_mapping_init(&info, image->arch.pgd,
-                                                  mstart, mend);
+               result = kernel_ident_mapping_init(&info,
+                                                level4p, mstart, mend);
                if (result)
                        return result;
        }
@@ -264,8 +263,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
                mstart = image->segment[i].mem;
                mend   = mstart + image->segment[i].memsz;
 
-               result = kernel_ident_mapping_init(&info, image->arch.pgd,
-                                                  mstart, mend);
+               result = kernel_ident_mapping_init(&info,
+                                                level4p, mstart, mend);
 
                if (result)
                        return result;
@@ -275,19 +274,15 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
         * Prepare EFI systab and ACPI tables for kexec kernel since they are
         * not covered by pfn_mapped.
         */
-       result = map_efi_systab(&info, image->arch.pgd);
+       result = map_efi_systab(&info, level4p);
        if (result)
                return result;
 
-       result = map_acpi_tables(&info, image->arch.pgd);
+       result = map_acpi_tables(&info, level4p);
        if (result)
                return result;
 
-       /*
-        * This must be last because the intermediate page table pages it
-        * allocates will not be control pages and may overlap the image.
-        */
-       return init_transition_pgtable(image, image->arch.pgd, control_page);
+       return init_transition_pgtable(image, level4p);
 }
 
 static void load_segments(void)
@@ -304,14 +299,14 @@ static void load_segments(void)
 
 int machine_kexec_prepare(struct kimage *image)
 {
-       unsigned long control_page;
+       unsigned long start_pgtable;
        int result;
 
        /* Calculate the offsets */
-       control_page = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+       start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
 
        /* Setup the identity mapped 64bit page table */
-       result = init_pgtable(image, control_page);
+       result = init_pgtable(image, start_pgtable);
        if (result)
                return result;
 
@@ -358,12 +353,13 @@ void machine_kexec(struct kimage *image)
 #endif
        }
 
-       control_page = page_address(image->control_code_page);
+       control_page = page_address(image->control_code_page) + PAGE_SIZE;
        __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
 
        page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
        page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
-       page_list[PA_TABLE_PAGE] = (unsigned long)__pa(image->arch.pgd);
+       page_list[PA_TABLE_PAGE] =
+         (unsigned long)__pa(page_address(image->control_code_page));
 
        if (image->type == KEXEC_TYPE_DEFAULT)
                page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
@@ -582,7 +578,8 @@ static void kexec_mark_crashkres(bool protect)
 
        /* Don't touch the control code page used in crash_kexec().*/
        control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
-       kexec_mark_range(crashk_res.start, control - 1, protect);
+       /* Control code page is located in the 2nd page. */
+       kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
        control += KEXEC_CONTROL_PAGE_SIZE;
        kexec_mark_range(control, crashk_res.end, protect);
 }