--- /dev/null
+From fdc6d38d64a20c542b1867ebeb8dd03b98829336 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Sat, 1 Oct 2022 19:09:24 +0200
+Subject: efi: memmap: Move manipulation routines into x86 arch tree
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit fdc6d38d64a20c542b1867ebeb8dd03b98829336 upstream.
+
+The EFI memory map is a description of the memory layout as provided by
+the firmware, and only x86 manipulates it in various different ways for
+its own memory bookkeeping. So let's move the memmap routines that are
+only used by x86 into the x86 arch tree.
+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/efi.h | 12 ++
+ arch/x86/platform/efi/Makefile | 3
+ arch/x86/platform/efi/memmap.c | 236 ++++++++++++++++++++++++++++++++++++++++
+ drivers/firmware/efi/memmap.c | 240 +----------------------------------------
+ include/linux/efi.h | 10 -
+ 5 files changed, 261 insertions(+), 240 deletions(-)
+ create mode 100644 arch/x86/platform/efi/memmap.c
+
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -416,6 +416,18 @@ static inline void efi_fake_memmap_early
+ }
+ #endif
+
++extern int __init efi_memmap_alloc(unsigned int num_entries,
++ struct efi_memory_map_data *data);
++extern void __efi_memmap_free(u64 phys, unsigned long size,
++ unsigned long flags);
++#define __efi_memmap_free __efi_memmap_free
++
++extern int __init efi_memmap_install(struct efi_memory_map_data *data);
++extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
++ struct range *range);
++extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
++ void *buf, struct efi_mem_range *mem);
++
+ #define arch_ima_efi_boot_mode \
+ ({ extern struct boot_params boot_params; boot_params.secure_boot; })
+
+--- a/arch/x86/platform/efi/Makefile
++++ b/arch/x86/platform/efi/Makefile
+@@ -2,5 +2,6 @@
+ KASAN_SANITIZE := n
+ GCOV_PROFILE := n
+
+-obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
++obj-$(CONFIG_EFI) += memmap.o quirks.o efi.o efi_$(BITS).o \
++ efi_stub_$(BITS).o
+ obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
+--- /dev/null
++++ b/arch/x86/platform/efi/memmap.c
+@@ -0,0 +1,236 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Common EFI memory map functions.
++ */
++
++#define pr_fmt(fmt) "efi: " fmt
++
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/efi.h>
++#include <linux/io.h>
++#include <asm/early_ioremap.h>
++#include <asm/efi.h>
++#include <linux/memblock.h>
++#include <linux/slab.h>
++
++static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
++{
++ return memblock_phys_alloc(size, SMP_CACHE_BYTES);
++}
++
++static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
++{
++ unsigned int order = get_order(size);
++ struct page *p = alloc_pages(GFP_KERNEL, order);
++
++ if (!p)
++ return 0;
++
++ return PFN_PHYS(page_to_pfn(p));
++}
++
++void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
++{
++ if (flags & EFI_MEMMAP_MEMBLOCK) {
++ if (slab_is_available())
++ memblock_free_late(phys, size);
++ else
++ memblock_phys_free(phys, size);
++ } else if (flags & EFI_MEMMAP_SLAB) {
++ struct page *p = pfn_to_page(PHYS_PFN(phys));
++ unsigned int order = get_order(size);
++
++ free_pages((unsigned long) page_address(p), order);
++ }
++}
++
++/**
++ * efi_memmap_alloc - Allocate memory for the EFI memory map
++ * @num_entries: Number of entries in the allocated map.
++ * @data: efi memmap installation parameters
++ *
++ * Depending on whether mm_init() has already been invoked or not,
++ * either memblock or "normal" page allocation is used.
++ *
++ * Returns zero on success, a negative error code on failure.
++ */
++int __init efi_memmap_alloc(unsigned int num_entries,
++ struct efi_memory_map_data *data)
++{
++ /* Expect allocation parameters are zero initialized */
++ WARN_ON(data->phys_map || data->size);
++
++ data->size = num_entries * efi.memmap.desc_size;
++ data->desc_version = efi.memmap.desc_version;
++ data->desc_size = efi.memmap.desc_size;
++ data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK);
++ data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE;
++
++ if (slab_is_available()) {
++ data->flags |= EFI_MEMMAP_SLAB;
++ data->phys_map = __efi_memmap_alloc_late(data->size);
++ } else {
++ data->flags |= EFI_MEMMAP_MEMBLOCK;
++ data->phys_map = __efi_memmap_alloc_early(data->size);
++ }
++
++ if (!data->phys_map)
++ return -ENOMEM;
++ return 0;
++}
++
++/**
++ * efi_memmap_install - Install a new EFI memory map in efi.memmap
++ * @ctx: map allocation parameters (address, size, flags)
++ *
++ * Unlike efi_memmap_init_*(), this function does not allow the caller
++ * to switch from early to late mappings. It simply uses the existing
++ * mapping function and installs the new memmap.
++ *
++ * Returns zero on success, a negative error code on failure.
++ */
++int __init efi_memmap_install(struct efi_memory_map_data *data)
++{
++ efi_memmap_unmap();
++
++ return __efi_memmap_init(data);
++}
++
++/**
++ * efi_memmap_split_count - Count number of additional EFI memmap entries
++ * @md: EFI memory descriptor to split
++ * @range: Address range (start, end) to split around
++ *
++ * Returns the number of additional EFI memmap entries required to
++ * accommodate @range.
++ */
++int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
++{
++ u64 m_start, m_end;
++ u64 start, end;
++ int count = 0;
++
++ start = md->phys_addr;
++ end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
++
++ /* modifying range */
++ m_start = range->start;
++ m_end = range->end;
++
++ if (m_start <= start) {
++ /* split into 2 parts */
++ if (start < m_end && m_end < end)
++ count++;
++ }
++
++ if (start < m_start && m_start < end) {
++ /* split into 3 parts */
++ if (m_end < end)
++ count += 2;
++ /* split into 2 parts */
++ if (end <= m_end)
++ count++;
++ }
++
++ return count;
++}
++
++/**
++ * efi_memmap_insert - Insert a memory region in an EFI memmap
++ * @old_memmap: The existing EFI memory map structure
++ * @buf: Address of buffer to store new map
++ * @mem: Memory map entry to insert
++ *
++ * It is suggested that you call efi_memmap_split_count() first
++ * to see how large @buf needs to be.
++ */
++void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf,
++ struct efi_mem_range *mem)
++{
++ u64 m_start, m_end, m_attr;
++ efi_memory_desc_t *md;
++ u64 start, end;
++ void *old, *new;
++
++ /* modifying range */
++ m_start = mem->range.start;
++ m_end = mem->range.end;
++ m_attr = mem->attribute;
++
++ /*
++ * The EFI memory map deals with regions in EFI_PAGE_SIZE
++ * units. Ensure that the region described by 'mem' is aligned
++ * correctly.
++ */
++ if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) ||
++ !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) {
++ WARN_ON(1);
++ return;
++ }
++
++ for (old = old_memmap->map, new = buf;
++ old < old_memmap->map_end;
++ old += old_memmap->desc_size, new += old_memmap->desc_size) {
++
++ /* copy original EFI memory descriptor */
++ memcpy(new, old, old_memmap->desc_size);
++ md = new;
++ start = md->phys_addr;
++ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
++
++ if (m_start <= start && end <= m_end)
++ md->attribute |= m_attr;
++
++ if (m_start <= start &&
++ (start < m_end && m_end < end)) {
++ /* first part */
++ md->attribute |= m_attr;
++ md->num_pages = (m_end - md->phys_addr + 1) >>
++ EFI_PAGE_SHIFT;
++ /* latter part */
++ new += old_memmap->desc_size;
++ memcpy(new, old, old_memmap->desc_size);
++ md = new;
++ md->phys_addr = m_end + 1;
++ md->num_pages = (end - md->phys_addr + 1) >>
++ EFI_PAGE_SHIFT;
++ }
++
++ if ((start < m_start && m_start < end) && m_end < end) {
++ /* first part */
++ md->num_pages = (m_start - md->phys_addr) >>
++ EFI_PAGE_SHIFT;
++ /* middle part */
++ new += old_memmap->desc_size;
++ memcpy(new, old, old_memmap->desc_size);
++ md = new;
++ md->attribute |= m_attr;
++ md->phys_addr = m_start;
++ md->num_pages = (m_end - m_start + 1) >>
++ EFI_PAGE_SHIFT;
++ /* last part */
++ new += old_memmap->desc_size;
++ memcpy(new, old, old_memmap->desc_size);
++ md = new;
++ md->phys_addr = m_end + 1;
++ md->num_pages = (end - m_end) >>
++ EFI_PAGE_SHIFT;
++ }
++
++ if ((start < m_start && m_start < end) &&
++ (end <= m_end)) {
++ /* first part */
++ md->num_pages = (m_start - md->phys_addr) >>
++ EFI_PAGE_SHIFT;
++ /* latter part */
++ new += old_memmap->desc_size;
++ memcpy(new, old, old_memmap->desc_size);
++ md = new;
++ md->phys_addr = m_start;
++ md->num_pages = (end - md->phys_addr + 1) >>
++ EFI_PAGE_SHIFT;
++ md->attribute |= m_attr;
++ }
++ }
++}
+--- a/drivers/firmware/efi/memmap.c
++++ b/drivers/firmware/efi/memmap.c
+@@ -9,82 +9,15 @@
+ #include <linux/kernel.h>
+ #include <linux/efi.h>
+ #include <linux/io.h>
+-#include <asm/early_ioremap.h>
+ #include <linux/memblock.h>
+ #include <linux/slab.h>
+
+-static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
+-{
+- return memblock_phys_alloc(size, SMP_CACHE_BYTES);
+-}
+-
+-static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
+-{
+- unsigned int order = get_order(size);
+- struct page *p = alloc_pages(GFP_KERNEL, order);
+-
+- if (!p)
+- return 0;
+-
+- return PFN_PHYS(page_to_pfn(p));
+-}
+-
+-void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
+-{
+- if (flags & EFI_MEMMAP_MEMBLOCK) {
+- if (slab_is_available())
+- memblock_free_late(phys, size);
+- else
+- memblock_phys_free(phys, size);
+- } else if (flags & EFI_MEMMAP_SLAB) {
+- struct page *p = pfn_to_page(PHYS_PFN(phys));
+- unsigned int order = get_order(size);
+-
+- free_pages((unsigned long) page_address(p), order);
+- }
+-}
+-
+-static void __init efi_memmap_free(void)
+-{
+- __efi_memmap_free(efi.memmap.phys_map,
+- efi.memmap.desc_size * efi.memmap.nr_map,
+- efi.memmap.flags);
+-}
+-
+-/**
+- * efi_memmap_alloc - Allocate memory for the EFI memory map
+- * @num_entries: Number of entries in the allocated map.
+- * @data: efi memmap installation parameters
+- *
+- * Depending on whether mm_init() has already been invoked or not,
+- * either memblock or "normal" page allocation is used.
+- *
+- * Returns zero on success, a negative error code on failure.
+- */
+-int __init efi_memmap_alloc(unsigned int num_entries,
+- struct efi_memory_map_data *data)
+-{
+- /* Expect allocation parameters are zero initialized */
+- WARN_ON(data->phys_map || data->size);
+-
+- data->size = num_entries * efi.memmap.desc_size;
+- data->desc_version = efi.memmap.desc_version;
+- data->desc_size = efi.memmap.desc_size;
+- data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK);
+- data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE;
+-
+- if (slab_is_available()) {
+- data->flags |= EFI_MEMMAP_SLAB;
+- data->phys_map = __efi_memmap_alloc_late(data->size);
+- } else {
+- data->flags |= EFI_MEMMAP_MEMBLOCK;
+- data->phys_map = __efi_memmap_alloc_early(data->size);
+- }
++#include <asm/early_ioremap.h>
++#include <asm/efi.h>
+
+- if (!data->phys_map)
+- return -ENOMEM;
+- return 0;
+-}
++#ifndef __efi_memmap_free
++#define __efi_memmap_free(phys, size, flags) do { } while (0)
++#endif
+
+ /**
+ * __efi_memmap_init - Common code for mapping the EFI memory map
+@@ -101,7 +34,7 @@ int __init efi_memmap_alloc(unsigned int
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+-static int __init __efi_memmap_init(struct efi_memory_map_data *data)
++int __init __efi_memmap_init(struct efi_memory_map_data *data)
+ {
+ struct efi_memory_map map;
+ phys_addr_t phys_map;
+@@ -121,8 +54,10 @@ static int __init __efi_memmap_init(stru
+ return -ENOMEM;
+ }
+
+- /* NOP if data->flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB) == 0 */
+- efi_memmap_free();
++ if (efi.memmap.flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB))
++ __efi_memmap_free(efi.memmap.phys_map,
++ efi.memmap.desc_size * efi.memmap.nr_map,
++ efi.memmap.flags);
+
+ map.phys_map = data->phys_map;
+ map.nr_map = data->size / data->desc_size;
+@@ -220,158 +155,3 @@ int __init efi_memmap_init_late(phys_add
+
+ return __efi_memmap_init(&data);
+ }
+-
+-/**
+- * efi_memmap_install - Install a new EFI memory map in efi.memmap
+- * @ctx: map allocation parameters (address, size, flags)
+- *
+- * Unlike efi_memmap_init_*(), this function does not allow the caller
+- * to switch from early to late mappings. It simply uses the existing
+- * mapping function and installs the new memmap.
+- *
+- * Returns zero on success, a negative error code on failure.
+- */
+-int __init efi_memmap_install(struct efi_memory_map_data *data)
+-{
+- efi_memmap_unmap();
+-
+- return __efi_memmap_init(data);
+-}
+-
+-/**
+- * efi_memmap_split_count - Count number of additional EFI memmap entries
+- * @md: EFI memory descriptor to split
+- * @range: Address range (start, end) to split around
+- *
+- * Returns the number of additional EFI memmap entries required to
+- * accommodate @range.
+- */
+-int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
+-{
+- u64 m_start, m_end;
+- u64 start, end;
+- int count = 0;
+-
+- start = md->phys_addr;
+- end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+-
+- /* modifying range */
+- m_start = range->start;
+- m_end = range->end;
+-
+- if (m_start <= start) {
+- /* split into 2 parts */
+- if (start < m_end && m_end < end)
+- count++;
+- }
+-
+- if (start < m_start && m_start < end) {
+- /* split into 3 parts */
+- if (m_end < end)
+- count += 2;
+- /* split into 2 parts */
+- if (end <= m_end)
+- count++;
+- }
+-
+- return count;
+-}
+-
+-/**
+- * efi_memmap_insert - Insert a memory region in an EFI memmap
+- * @old_memmap: The existing EFI memory map structure
+- * @buf: Address of buffer to store new map
+- * @mem: Memory map entry to insert
+- *
+- * It is suggested that you call efi_memmap_split_count() first
+- * to see how large @buf needs to be.
+- */
+-void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf,
+- struct efi_mem_range *mem)
+-{
+- u64 m_start, m_end, m_attr;
+- efi_memory_desc_t *md;
+- u64 start, end;
+- void *old, *new;
+-
+- /* modifying range */
+- m_start = mem->range.start;
+- m_end = mem->range.end;
+- m_attr = mem->attribute;
+-
+- /*
+- * The EFI memory map deals with regions in EFI_PAGE_SIZE
+- * units. Ensure that the region described by 'mem' is aligned
+- * correctly.
+- */
+- if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) ||
+- !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) {
+- WARN_ON(1);
+- return;
+- }
+-
+- for (old = old_memmap->map, new = buf;
+- old < old_memmap->map_end;
+- old += old_memmap->desc_size, new += old_memmap->desc_size) {
+-
+- /* copy original EFI memory descriptor */
+- memcpy(new, old, old_memmap->desc_size);
+- md = new;
+- start = md->phys_addr;
+- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+-
+- if (m_start <= start && end <= m_end)
+- md->attribute |= m_attr;
+-
+- if (m_start <= start &&
+- (start < m_end && m_end < end)) {
+- /* first part */
+- md->attribute |= m_attr;
+- md->num_pages = (m_end - md->phys_addr + 1) >>
+- EFI_PAGE_SHIFT;
+- /* latter part */
+- new += old_memmap->desc_size;
+- memcpy(new, old, old_memmap->desc_size);
+- md = new;
+- md->phys_addr = m_end + 1;
+- md->num_pages = (end - md->phys_addr + 1) >>
+- EFI_PAGE_SHIFT;
+- }
+-
+- if ((start < m_start && m_start < end) && m_end < end) {
+- /* first part */
+- md->num_pages = (m_start - md->phys_addr) >>
+- EFI_PAGE_SHIFT;
+- /* middle part */
+- new += old_memmap->desc_size;
+- memcpy(new, old, old_memmap->desc_size);
+- md = new;
+- md->attribute |= m_attr;
+- md->phys_addr = m_start;
+- md->num_pages = (m_end - m_start + 1) >>
+- EFI_PAGE_SHIFT;
+- /* last part */
+- new += old_memmap->desc_size;
+- memcpy(new, old, old_memmap->desc_size);
+- md = new;
+- md->phys_addr = m_end + 1;
+- md->num_pages = (end - m_end) >>
+- EFI_PAGE_SHIFT;
+- }
+-
+- if ((start < m_start && m_start < end) &&
+- (end <= m_end)) {
+- /* first part */
+- md->num_pages = (m_start - md->phys_addr) >>
+- EFI_PAGE_SHIFT;
+- /* latter part */
+- new += old_memmap->desc_size;
+- memcpy(new, old, old_memmap->desc_size);
+- md = new;
+- md->phys_addr = m_start;
+- md->num_pages = (end - md->phys_addr + 1) >>
+- EFI_PAGE_SHIFT;
+- md->attribute |= m_attr;
+- }
+- }
+-}
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -709,18 +709,10 @@ static inline efi_status_t efi_query_var
+ #endif
+ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+
+-extern int __init efi_memmap_alloc(unsigned int num_entries,
+- struct efi_memory_map_data *data);
+-extern void __efi_memmap_free(u64 phys, unsigned long size,
+- unsigned long flags);
++extern int __init __efi_memmap_init(struct efi_memory_map_data *data);
+ extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
+ extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
+ extern void __init efi_memmap_unmap(void);
+-extern int __init efi_memmap_install(struct efi_memory_map_data *data);
+-extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+- struct range *range);
+-extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
+- void *buf, struct efi_mem_range *mem);
+
+ #ifdef CONFIG_EFI_ESRT
+ extern void __init efi_esrt_init(void);
--- /dev/null
+From 75dde792d6f6c2d0af50278bd374bf0c512fe196 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Mon, 10 Jun 2024 16:02:13 +0200
+Subject: efi/x86: Free EFI memory map only when installing a new one.
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 75dde792d6f6c2d0af50278bd374bf0c512fe196 upstream.
+
+The logic in __efi_memmap_init() is shared between two different
+execution flows:
+- mapping the EFI memory map early or late into the kernel VA space, so
+ that its entries can be accessed;
+- the x86 specific cloning of the EFI memory map in order to insert new
+ entries that are created as a result of making a memory reservation
+ via a call to efi_mem_reserve().
+
+In the former case, the underlying memory containing the kernel's view
+of the EFI memory map (which may be heavily modified by the kernel
+itself on x86) is not modified at all, and the only thing that changes
+is the virtual mapping of this memory, which is different between early
+and late boot.
+
+In the latter case, an entirely new allocation is created that carries a
+new, updated version of the kernel's view of the EFI memory map. When
+installing this new version, the old version will no longer be
+referenced, and if the memory was allocated by the kernel, it will leak
+unless it gets freed.
+
+The logic that implements this freeing currently lives on the code path
+that is shared between these two use cases, but it should only apply to
+the latter. So move it to the correct spot.
+
+While at it, drop the dummy definition for non-x86 architectures, as
+that is no longer needed.
+
+Cc: <stable@vger.kernel.org>
+Fixes: f0ef6523475f ("efi: Fix efi_memmap_alloc() leaks")
+Tested-by: Ashish Kalra <Ashish.Kalra@amd.com>
+Link: https://lore.kernel.org/all/36ad5079-4326-45ed-85f6-928ff76483d3@amd.com
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/efi.h | 1 -
+ arch/x86/platform/efi/memmap.c | 12 +++++++++++-
+ drivers/firmware/efi/memmap.c | 9 ---------
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -420,7 +420,6 @@ extern int __init efi_memmap_alloc(unsig
+ struct efi_memory_map_data *data);
+ extern void __efi_memmap_free(u64 phys, unsigned long size,
+ unsigned long flags);
+-#define __efi_memmap_free __efi_memmap_free
+
+ extern int __init efi_memmap_install(struct efi_memory_map_data *data);
+ extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+--- a/arch/x86/platform/efi/memmap.c
++++ b/arch/x86/platform/efi/memmap.c
+@@ -92,12 +92,22 @@ int __init efi_memmap_alloc(unsigned int
+ */
+ int __init efi_memmap_install(struct efi_memory_map_data *data)
+ {
++ unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map;
++ unsigned long flags = efi.memmap.flags;
++ u64 phys = efi.memmap.phys_map;
++ int ret;
++
+ efi_memmap_unmap();
+
+ if (efi_enabled(EFI_PARAVIRT))
+ return 0;
+
+- return __efi_memmap_init(data);
++ ret = __efi_memmap_init(data);
++ if (ret)
++ return ret;
++
++ __efi_memmap_free(phys, size, flags);
++ return 0;
+ }
+
+ /**
+--- a/drivers/firmware/efi/memmap.c
++++ b/drivers/firmware/efi/memmap.c
+@@ -15,10 +15,6 @@
+ #include <asm/early_ioremap.h>
+ #include <asm/efi.h>
+
+-#ifndef __efi_memmap_free
+-#define __efi_memmap_free(phys, size, flags) do { } while (0)
+-#endif
+-
+ /**
+ * __efi_memmap_init - Common code for mapping the EFI memory map
+ * @data: EFI memory map data
+@@ -51,11 +47,6 @@ int __init __efi_memmap_init(struct efi_
+ return -ENOMEM;
+ }
+
+- if (efi.memmap.flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB))
+- __efi_memmap_free(efi.memmap.phys_map,
+- efi.memmap.desc_size * efi.memmap.nr_map,
+- efi.memmap.flags);
+-
+ map.phys_map = data->phys_map;
+ map.nr_map = data->size / data->desc_size;
+ map.map_end = map.map + data->size;
--- /dev/null
+From bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 Mon Sep 17 00:00:00 2001
+From: yangge <yangge1116@126.com>
+Date: Thu, 20 Jun 2024 08:59:50 +0800
+Subject: mm/page_alloc: Separate THP PCP into movable and non-movable categories
+
+From: yangge <yangge1116@126.com>
+
+commit bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 upstream.
+
+Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for
+THP-sized allocations") no longer differentiates the migration type of
+pages in THP-sized PCP list, it's possible that non-movable allocation
+requests may get a CMA page from the list, in some cases, it's not
+acceptable.
+
+If a large number of CMA memory are configured in system (for example, the
+CMA memory accounts for 50% of the system memory), starting a virtual
+machine with device passthrough will get stuck. During starting the
+virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM,
+...) to pin memory. Normally if a page is present and in CMA area,
+pin_user_pages_remote() will migrate the page from CMA area to non-CMA
+area because of FOLL_LONGTERM flag. But if non-movable allocation
+requests return CMA memory, migrate_longterm_unpinnable_pages() will
+migrate a CMA page to another CMA page, which will fail to pass the check
+in check_and_migrate_movable_pages() and cause migration endless.
+
+Call trace:
+pin_user_pages_remote
+--__gup_longterm_locked // endless loops in this function
+----_get_user_pages_locked
+----check_and_migrate_movable_pages
+------migrate_longterm_unpinnable_pages
+--------alloc_migration_target
+
+This problem will also have a negative impact on CMA itself. For example,
+when CMA is borrowed by THP, and we need to reclaim it through cma_alloc()
+or dma_alloc_coherent(), we must move those pages out to ensure CMA's
+users can retrieve that contigous memory. Currently, CMA's memory is
+occupied by non-movable pages, meaning we can't relocate them. As a
+result, cma_alloc() is more likely to fail.
+
+To fix the problem above, we add one PCP list for THP, which will not
+introduce a new cacheline for struct per_cpu_pages. THP will have 2 PCP
+lists, one PCP list is used by MOVABLE allocation, and the other PCP list
+is used by UNMOVABLE allocation. MOVABLE allocation contains GPF_MOVABLE,
+and UNMOVABLE allocation contains GFP_UNMOVABLE and GFP_RECLAIMABLE.
+
+Link: https://lkml.kernel.org/r/1718845190-4456-1-git-send-email-yangge1116@126.com
+Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations")
+Signed-off-by: yangge <yangge1116@126.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmzone.h | 9 ++++-----
+ mm/page_alloc.c | 8 ++++++--
+ 2 files changed, 10 insertions(+), 7 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -552,13 +552,12 @@ enum zone_watermarks {
+ };
+
+ /*
+- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
+- * for THP which will usually be GFP_MOVABLE. Even if it is another type,
+- * it should not contribute to serious fragmentation causing THP allocation
+- * failures.
++ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists
++ * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list
++ * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE.
+ */
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-#define NR_PCP_THP 1
++#define NR_PCP_THP 2
+ #else
+ #define NR_PCP_THP 0
+ #endif
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -705,12 +705,16 @@ out:
+
+ static inline unsigned int order_to_pindex(int migratetype, int order)
+ {
++ bool __maybe_unused movable;
+ int base = order;
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ VM_BUG_ON(order != pageblock_order);
+- return NR_LOWORDER_PCP_LISTS;
++
++ movable = migratetype == MIGRATE_MOVABLE;
++
++ return NR_LOWORDER_PCP_LISTS + movable;
+ }
+ #else
+ VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
+@@ -724,7 +728,7 @@ static inline int pindex_to_order(unsign
+ int order = pindex / MIGRATE_PCPTYPES;
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- if (pindex == NR_LOWORDER_PCP_LISTS)
++ if (pindex >= NR_LOWORDER_PCP_LISTS)
+ order = pageblock_order;
+ #else
+ VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);