From e62dbf4893082576ca8b00b454a05cd46a23766b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 6 Jun 2024 15:12:47 +0200
Subject: [PATCH] 6.1-stable patches

added patches:
	efi-libstub-only-free-priv.runtime_map-when-allocated.patch
	genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
	kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
	x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
---
 ...free-priv.runtime_map-when-allocated.patch |  44 +++++++
 ...event-vector-leak-during-cpu-offline.patch | 123 ++++++++++++++++++
 ...xphyaddr-as-host.maxphyaddr-in-cpuid.patch |  74 +++++++++++
 queue-6.1/series                              |   4 +
 ...kaslr-when-memory-reservations-exist.patch | 107 +++++++++++++++
 5 files changed, 352 insertions(+)
 create mode 100644 queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
 create mode 100644 queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
 create mode 100644 queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
 create mode 100644 queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch

diff --git a/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch b/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
new file mode 100644
index 00000000000..6cba49e95ce
--- /dev/null
+++ b/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
@@ -0,0 +1,44 @@
+From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001
+From: Hagar Hemdan <hagarhem@amazon.com>
+Date: Tue, 23 Apr 2024 13:59:26 +0000
+Subject: efi: libstub: only free priv.runtime_map when allocated
+
+From: Hagar Hemdan <hagarhem@amazon.com>
+
+commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream.
+
+priv.runtime_map is only allocated when efi_novamap is not set.
+Otherwise, it is an uninitialized value.  In the error path, it is freed
+unconditionally.  Avoid passing an uninitialized value to free_pool.
+Free priv.runtime_map only when it was allocated.
+
+This bug was discovered and resolved using Coverity Static Analysis
+Security Testing (SAST) by Synopsys, Inc.
+
+Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/fdt.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
+index 70e9789ff9de..6a337f1f8787 100644
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -335,8 +335,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
+ 
+ fail:
+ 	efi_free(fdt_size, fdt_addr);
+-
+-	efi_bs_call(free_pool, priv.runtime_map);
++	if (!efi_novamap)
++		efi_bs_call(free_pool, priv.runtime_map);
+ 
+ 	return EFI_LOAD_ERROR;
+ }
+-- 
+2.45.2
+
diff --git a/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
new file mode 100644
index 00000000000..5a8ef5f22ff
--- /dev/null
+++ b/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
@@ -0,0 +1,123 @@
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c |    9 ++++++---
+ kernel/irq/cpuhotplug.c       |   16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -982,7 +982,8 @@ static void __send_cleanup_vector(struct
+ 		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+ 		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+ 	} else {
+-		apicd->prev_vector = 0;
++		pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++		free_moved_vector(apicd);
+ 	}
+ 	raw_spin_unlock(&vector_lock);
+ }
+@@ -1019,6 +1020,7 @@ void irq_complete_move(struct irq_cfg *c
+  */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++	unsigned int cpu = smp_processor_id();
+ 	struct apic_chip_data *apicd;
+ 	struct irq_data *irqd;
+ 	unsigned int vector;
+@@ -1043,10 +1045,11 @@ void irq_force_complete_move(struct irq_
+ 		goto unlock;
+ 
+ 	/*
+-	 * If prev_vector is empty, no action required.
++	 * If prev_vector is empty or the descriptor is neither currently
++	 * nor previously on the outgoing CPU no action required.
+ 	 */
+ 	vector = apicd->prev_vector;
+-	if (!vector)
++	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ 		goto unlock;
+ 
+ 	/*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+ 	}
+ 
+ 	/*
++	 * Complete an eventually pending irq move cleanup. If this
++	 * interrupt was moved in hard irq context, then the vectors need
++	 * to be cleaned up. It can't wait until this interrupt actually
++	 * happens and this CPU was involved.
++	 */
++	irq_force_complete_move(desc);
++
++	/*
+ 	 * No move required, if:
+ 	 * - Interrupt is per cpu
+ 	 * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+ 	}
+ 
+ 	/*
+-	 * Complete an eventually pending irq move cleanup. If this
+-	 * interrupt was moved in hard irq context, then the vectors need
+-	 * to be cleaned up. It can't wait until this interrupt actually
+-	 * happens and this CPU was involved.
+-	 */
+-	irq_force_complete_move(desc);
+-
+-	/*
+ 	 * If there is a setaffinity pending, then try to reuse the pending
+ 	 * mask, so the last change of the affinity does not get lost. If
+ 	 * there is no move pending or the pending mask does not contain
diff --git a/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
new file mode 100644
index 00000000000..39d8bdc0435
--- /dev/null
+++ b/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
@@ -0,0 +1,74 @@
+From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Wed, 13 Mar 2024 13:58:42 +0100
+Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID
+
+From: Gerd Hoffmann <kraxel@redhat.com>
+
+commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream.
+
+Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16])
+to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths
+to userspace via KVM_GET_SUPPORTED_CPUID.
+
+Per AMD, GuestPhysBits is intended for software use, and physical CPUs do
+not set that field.  I.e. GuestPhysBits will be non-zero if and only if
+KVM is running as a nested hypervisor, and in that case, GuestPhysBits is
+NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with
+TDP enabled.
+
+E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum
+*addressable* guest physical address, which would result in KVM under-
+reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52,
+but without 5-level paging.
+
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com
+[sean: rewrite changelog with --verbose, Cc stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -1157,9 +1157,8 @@ static inline int __do_cpuid_func(struct
+ 		entry->eax = entry->ebx = entry->ecx = 0;
+ 		break;
+ 	case 0x80000008: {
+-		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+-		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+-		unsigned phys_as = entry->eax & 0xff;
++		unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
++		unsigned int phys_as;
+ 
+ 		/*
+ 		 * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+@@ -1167,16 +1166,16 @@ static inline int __do_cpuid_func(struct
+ 		 * reductions in MAXPHYADDR for memory encryption affect shadow
+ 		 * paging, too.
+ 		 *
+-		 * If TDP is enabled but an explicit guest MAXPHYADDR is not
+-		 * provided, use the raw bare metal MAXPHYADDR as reductions to
+-		 * the HPAs do not affect GPAs.
++		 * If TDP is enabled, use the raw bare metal MAXPHYADDR as
++		 * reductions to the HPAs do not affect GPAs.
+ 		 */
+-		if (!tdp_enabled)
+-			g_phys_as = boot_cpu_data.x86_phys_bits;
+-		else if (!g_phys_as)
+-			g_phys_as = phys_as;
++		if (!tdp_enabled) {
++			phys_as = boot_cpu_data.x86_phys_bits;
++		} else {
++			phys_as = entry->eax & 0xff;
++		}
+ 
+-		entry->eax = g_phys_as | (virt_as << 8);
++		entry->eax = phys_as | (virt_as << 8);
+ 		entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+ 		entry->edx = 0;
+ 		cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/queue-6.1/series b/queue-6.1/series
index 63bb48a57f0..81204eab3d8 100644
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -44,6 +44,10 @@ regulator-vqmmc-ipq4019-fix-module-autoloading.patch
 asoc-rt715-add-vendor-clear-control-register.patch
 asoc-rt715-sdca-volume-step-modification.patch
 kvm-selftests-add-test-for-uaccesses-to-non-existent.patch
+x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
+efi-libstub-only-free-priv.runtime_map-when-allocated.patch
+kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
+genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
 fpga-dfl-pci-add-pci-subdevice-id-for-intel-d5005-ca.patch
 softirq-fix-suspicious-rcu-usage-in-__do_softirq.patch
 asoc-da7219-aad-fix-usage-of-device_get_named_child_.patch
diff --git a/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch b/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
new file mode 100644
index 00000000000..8d799288e86
--- /dev/null
+++ b/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
@@ -0,0 +1,107 @@
+From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 16 May 2024 11:05:42 +0200
+Subject: x86/efistub: Omit physical KASLR when memory reservations exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream.
+
+The legacy decompressor has elaborate logic to ensure that the
+randomized physical placement of the decompressed kernel image does not
+conflict with any memory reservations, including ones specified on the
+command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are
+taken into account by the kernel proper at a later stage.
+
+When booting in EFI mode, it is the firmware's job to ensure that the
+chosen range does not conflict with any memory reservations that it
+knows about, and this is trivially achieved by using the firmware's
+memory allocation APIs.
+
+That leaves reservations specified on the command line, though, which
+the firmware knows nothing about, as these regions have no other special
+significance to the platform. Since commit
+
+  a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+
+these reservations are not taken into account when randomizing the
+physical placement, which may result in conflicts where the memory
+cannot be reserved by the kernel proper because its own executable image
+resides there.
+
+To avoid having to duplicate or reuse the existing complicated logic,
+disable physical KASLR entirely when such overrides are specified. These
+are mostly diagnostic tools or niche features, and physical KASLR (as
+opposed to virtual KASLR, which is much more important as it affects the
+memory addresses observed by code executing in the kernel) is something
+we can live without.
+
+Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com
+Reported-by: Ben Chaney <bchaney@akamai.com>
+Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+Cc:  <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/x86-stub.c |   28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -736,6 +736,26 @@ static void error(char *str)
+ 	efi_warn("Decompression failed: %s\n", str);
+ }
+ 
++static const char *cmdline_memmap_override;
++
++static efi_status_t parse_options(const char *cmdline)
++{
++	static const char opts[][14] = {
++		"mem=", "memmap=", "efi_fake_mem=", "hugepages="
++	};
++
++	for (int i = 0; i < ARRAY_SIZE(opts); i++) {
++		const char *p = strstr(cmdline, opts[i]);
++
++		if (p == cmdline || (p > cmdline && isspace(p[-1]))) {
++			cmdline_memmap_override = opts[i];
++			break;
++		}
++	}
++
++	return efi_parse_options(cmdline);
++}
++
+ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+ {
+ 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+@@ -767,6 +787,10 @@ static efi_status_t efi_decompress_kerne
+ 		    !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
+ 			efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
+ 			seed[0] = 0;
++		} else if (cmdline_memmap_override) {
++			efi_info("%s detected on the kernel command line - disabling physical KASLR\n",
++				 cmdline_memmap_override);
++			seed[0] = 0;
+ 		}
+ 
+ 		boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
+@@ -843,7 +867,7 @@ void __noreturn efi_stub_entry(efi_handl
+ 	}
+ 
+ #ifdef CONFIG_CMDLINE_BOOL
+-	status = efi_parse_options(CONFIG_CMDLINE);
++	status = parse_options(CONFIG_CMDLINE);
+ 	if (status != EFI_SUCCESS) {
+ 		efi_err("Failed to parse options\n");
+ 		goto fail;
+@@ -852,7 +876,7 @@ void __noreturn efi_stub_entry(efi_handl
+ 	if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+ 		unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+ 					       ((u64)boot_params->ext_cmd_line_ptr << 32));
+-		status = efi_parse_options((char *)cmdline_paddr);
++		status = parse_options((char *)cmdline_paddr);
+ 		if (status != EFI_SUCCESS) {
+ 			efi_err("Failed to parse options\n");
+ 			goto fail;
-- 
2.47.3