From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 6 Jun 2024 13:14:03 +0000 (+0200)
Subject: 6.9-stable patches
X-Git-Tag: v6.1.93~20
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a43b832ba21f38a0a059ae2363bd68b432c46123;p=thirdparty%2Fkernel%2Fstable-queue.git

6.9-stable patches

added patches:
	efi-libstub-only-free-priv.runtime_map-when-allocated.patch
	genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
	kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
	platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
	platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
	x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
	x86-pci-skip-early-e820-check-for-ecam-region.patch
	x86-topology-handle-bogus-acpi-tables-correctly.patch
	x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch
---

diff --git a/queue-6.9/efi-libstub-only-free-priv.runtime_map-when-allocated.patch b/queue-6.9/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
new file mode 100644
index 00000000000..96f85f9573b
--- /dev/null
+++ b/queue-6.9/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
@@ -0,0 +1,39 @@
+From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001
+From: Hagar Hemdan <hagarhem@amazon.com>
+Date: Tue, 23 Apr 2024 13:59:26 +0000
+Subject: efi: libstub: only free priv.runtime_map when allocated
+
+From: Hagar Hemdan <hagarhem@amazon.com>
+
+commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream.
+
+priv.runtime_map is only allocated when efi_novamap is not set.
+Otherwise, it is an uninitialized value.  In the error path, it is freed
+unconditionally.  Avoid passing an uninitialized value to free_pool.
+Free priv.runtime_map only when it was allocated.
+
+This bug was discovered and resolved using Coverity Static Analysis
+Security Testing (SAST) by Synopsys, Inc.
+
+Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/fdt.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -335,8 +335,8 @@ fail_free_new_fdt:
+ 
+ fail:
+ 	efi_free(fdt_size, fdt_addr);
+-
+-	efi_bs_call(free_pool, priv.runtime_map);
++	if (!efi_novamap)
++		efi_bs_call(free_pool, priv.runtime_map);
+ 
+ 	return EFI_LOAD_ERROR;
+ }
diff --git a/queue-6.9/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-6.9/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
new file mode 100644
index 00000000000..c6c9cd45639
--- /dev/null
+++ b/queue-6.9/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
@@ -0,0 +1,123 @@
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c |    9 ++++++---
+ kernel/irq/cpuhotplug.c       |   16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -1036,7 +1036,8 @@ static void __vector_schedule_cleanup(st
+ 			add_timer_on(&cl->timer, cpu);
+ 		}
+ 	} else {
+-		apicd->prev_vector = 0;
++		pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++		free_moved_vector(apicd);
+ 	}
+ 	raw_spin_unlock(&vector_lock);
+ }
+@@ -1073,6 +1074,7 @@ void irq_complete_move(struct irq_cfg *c
+  */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++	unsigned int cpu = smp_processor_id();
+ 	struct apic_chip_data *apicd;
+ 	struct irq_data *irqd;
+ 	unsigned int vector;
+@@ -1097,10 +1099,11 @@ void irq_force_complete_move(struct irq_
+ 		goto unlock;
+ 
+ 	/*
+-	 * If prev_vector is empty, no action required.
++	 * If prev_vector is empty or the descriptor is neither currently
++	 * nor previously on the outgoing CPU no action required.
+ 	 */
+ 	vector = apicd->prev_vector;
+-	if (!vector)
++	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ 		goto unlock;
+ 
+ 	/*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+ 	}
+ 
+ 	/*
++	 * Complete an eventually pending irq move cleanup. If this
++	 * interrupt was moved in hard irq context, then the vectors need
++	 * to be cleaned up. It can't wait until this interrupt actually
++	 * happens and this CPU was involved.
++	 */
++	irq_force_complete_move(desc);
++
++	/*
+ 	 * No move required, if:
+ 	 * - Interrupt is per cpu
+ 	 * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+ 	}
+ 
+ 	/*
+-	 * Complete an eventually pending irq move cleanup. If this
+-	 * interrupt was moved in hard irq context, then the vectors need
+-	 * to be cleaned up. It can't wait until this interrupt actually
+-	 * happens and this CPU was involved.
+-	 */
+-	irq_force_complete_move(desc);
+-
+-	/*
+ 	 * If there is a setaffinity pending, then try to reuse the pending
+ 	 * mask, so the last change of the affinity does not get lost. If
+ 	 * there is no move pending or the pending mask does not contain
diff --git a/queue-6.9/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-6.9/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
new file mode 100644
index 00000000000..5294510e073
--- /dev/null
+++ b/queue-6.9/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
@@ -0,0 +1,74 @@
+From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Wed, 13 Mar 2024 13:58:42 +0100
+Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID
+
+From: Gerd Hoffmann <kraxel@redhat.com>
+
+commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream.
+
+Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16])
+to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths
+to userspace via KVM_GET_SUPPORTED_CPUID.
+
+Per AMD, GuestPhysBits is intended for software use, and physical CPUs do
+not set that field.  I.e. GuestPhysBits will be non-zero if and only if
+KVM is running as a nested hypervisor, and in that case, GuestPhysBits is
+NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with
+TDP enabled.
+
+E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum
+*addressable* guest physical address, which would result in KVM under-
+reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52,
+but without 5-level paging.
+
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com
+[sean: rewrite changelog with --verbose, Cc stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -1232,9 +1232,8 @@ static inline int __do_cpuid_func(struct
+ 		entry->eax = entry->ebx = entry->ecx = 0;
+ 		break;
+ 	case 0x80000008: {
+-		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+-		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+-		unsigned phys_as = entry->eax & 0xff;
++		unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
++		unsigned int phys_as;
+ 
+ 		/*
+ 		 * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+@@ -1242,16 +1241,16 @@ static inline int __do_cpuid_func(struct
+ 		 * reductions in MAXPHYADDR for memory encryption affect shadow
+ 		 * paging, too.
+ 		 *
+-		 * If TDP is enabled but an explicit guest MAXPHYADDR is not
+-		 * provided, use the raw bare metal MAXPHYADDR as reductions to
+-		 * the HPAs do not affect GPAs.
++		 * If TDP is enabled, use the raw bare metal MAXPHYADDR as
++		 * reductions to the HPAs do not affect GPAs.
+ 		 */
+-		if (!tdp_enabled)
+-			g_phys_as = boot_cpu_data.x86_phys_bits;
+-		else if (!g_phys_as)
+-			g_phys_as = phys_as;
++		if (!tdp_enabled) {
++			phys_as = boot_cpu_data.x86_phys_bits;
++		} else {
++			phys_as = entry->eax & 0xff;
++		}
+ 
+-		entry->eax = g_phys_as | (virt_as << 8);
++		entry->eax = phys_as | (virt_as << 8);
+ 		entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+ 		entry->edx = 0;
+ 		cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/queue-6.9/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch b/queue-6.9/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
new file mode 100644
index 00000000000..6f4d9ba4dbb
--- /dev/null
+++ b/queue-6.9/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
@@ -0,0 +1,42 @@
+From 2920141fc149f71bad22361946417bc43783ed7f Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Tue, 23 Apr 2024 13:46:10 -0700
+Subject: platform/x86/intel/tpmi: Handle error from tpmi_process_info()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 2920141fc149f71bad22361946417bc43783ed7f upstream.
+
+When tpmi_process_info() returns error, fail to load the driver.
+This can happen if call to ioremap() returns error.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Reviewed-by: Ilpo JÃ¤rvinen <ilpo.jarvinen@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.3+
+Link: https://lore.kernel.org/r/20240423204619.3946901-2-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/tpmi.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/platform/x86/intel/tpmi.c
++++ b/drivers/platform/x86/intel/tpmi.c
+@@ -763,8 +763,11 @@ static int intel_vsec_tpmi_init(struct a
+ 		 * when actual device nodes created outside this
+ 		 * loop via tpmi_create_devices().
+ 		 */
+-		if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID)
+-			tpmi_process_info(tpmi_info, pfs);
++		if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID) {
++			ret = tpmi_process_info(tpmi_info, pfs);
++			if (ret)
++				return ret;
++		}
+ 
+ 		if (pfs->pfs_header.tpmi_id == TPMI_CONTROL_ID)
+ 			tpmi_set_control_base(auxdev, tpmi_info, pfs);
diff --git a/queue-6.9/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch b/queue-6.9/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
new file mode 100644
index 00000000000..2930a70e33c
--- /dev/null
+++ b/queue-6.9/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
@@ -0,0 +1,50 @@
+From db643cb7ebe524d17b4b13583dda03485d4a1bc0 Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Mon, 15 Apr 2024 14:52:10 -0700
+Subject: platform/x86/intel-uncore-freq: Don't present root domain on error
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit db643cb7ebe524d17b4b13583dda03485d4a1bc0 upstream.
+
+If none of the clusters are added because of some error, fail to load
+driver without presenting root domain. In this case root domain will
+present invalid data.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Fixes: 01c10f88c9b7 ("platform/x86/intel-uncore-freq: tpmi: Provide cluster level control")
+Cc: <stable@vger.kernel.org> # 6.5+
+Link: https://lore.kernel.org/r/20240415215210.2824868-1-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+@@ -240,6 +240,7 @@ static int uncore_probe(struct auxiliary
+ 	bool read_blocked = 0, write_blocked = 0;
+ 	struct intel_tpmi_plat_info *plat_info;
+ 	struct tpmi_uncore_struct *tpmi_uncore;
++	bool uncore_sysfs_added = false;
+ 	int ret, i, pkg = 0;
+ 	int num_resources;
+ 
+@@ -384,9 +385,15 @@ static int uncore_probe(struct auxiliary
+ 			}
+ 			/* Point to next cluster offset */
+ 			cluster_offset >>= UNCORE_MAX_CLUSTER_PER_DOMAIN;
++			uncore_sysfs_added = true;
+ 		}
+ 	}
+ 
++	if (!uncore_sysfs_added) {
++		ret = -ENODEV;
++		goto remove_clusters;
++	}
++
+ 	auxiliary_set_drvdata(auxdev, tpmi_uncore);
+ 
+ 	tpmi_uncore->root_cluster.root_domain = true;
diff --git a/queue-6.9/series b/queue-6.9/series
index 1b90c113cb8..60916d71a7c 100644
--- a/queue-6.9/series
+++ b/queue-6.9/series
@@ -363,3 +363,12 @@ hwmon-shtc1-fix-property-misspelling.patch
 riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch
 alsa-seq-ump-fix-swapped-song-position-pointer-data.patch
 revert-drm-make-drivers-depends-on-drm_dw_hdmi.patch
+x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
+efi-libstub-only-free-priv.runtime_map-when-allocated.patch
+x86-topology-handle-bogus-acpi-tables-correctly.patch
+x86-pci-skip-early-e820-check-for-ecam-region.patch
+kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
+x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch
+genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
+platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
+platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
diff --git a/queue-6.9/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch b/queue-6.9/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
new file mode 100644
index 00000000000..7dafff96bb6
--- /dev/null
+++ b/queue-6.9/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
@@ -0,0 +1,107 @@
+From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 16 May 2024 11:05:42 +0200
+Subject: x86/efistub: Omit physical KASLR when memory reservations exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream.
+
+The legacy decompressor has elaborate logic to ensure that the
+randomized physical placement of the decompressed kernel image does not
+conflict with any memory reservations, including ones specified on the
+command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are
+taken into account by the kernel proper at a later stage.
+
+When booting in EFI mode, it is the firmware's job to ensure that the
+chosen range does not conflict with any memory reservations that it
+knows about, and this is trivially achieved by using the firmware's
+memory allocation APIs.
+
+That leaves reservations specified on the command line, though, which
+the firmware knows nothing about, as these regions have no other special
+significance to the platform. Since commit
+
+  a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+
+these reservations are not taken into account when randomizing the
+physical placement, which may result in conflicts where the memory
+cannot be reserved by the kernel proper because its own executable image
+resides there.
+
+To avoid having to duplicate or reuse the existing complicated logic,
+disable physical KASLR entirely when such overrides are specified. These
+are mostly diagnostic tools or niche features, and physical KASLR (as
+opposed to virtual KASLR, which is much more important as it affects the
+memory addresses observed by code executing in the kernel) is something
+we can live without.
+
+Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com
+Reported-by: Ben Chaney <bchaney@akamai.com>
+Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+Cc:  <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/x86-stub.c |   28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -776,6 +776,26 @@ static void error(char *str)
+ 	efi_warn("Decompression failed: %s\n", str);
+ }
+ 
++static const char *cmdline_memmap_override;
++
++static efi_status_t parse_options(const char *cmdline)
++{
++	static const char opts[][14] = {
++		"mem=", "memmap=", "efi_fake_mem=", "hugepages="
++	};
++
++	for (int i = 0; i < ARRAY_SIZE(opts); i++) {
++		const char *p = strstr(cmdline, opts[i]);
++
++		if (p == cmdline || (p > cmdline && isspace(p[-1]))) {
++			cmdline_memmap_override = opts[i];
++			break;
++		}
++	}
++
++	return efi_parse_options(cmdline);
++}
++
+ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+ {
+ 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+@@ -807,6 +827,10 @@ static efi_status_t efi_decompress_kerne
+ 		    !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
+ 			efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
+ 			seed[0] = 0;
++		} else if (cmdline_memmap_override) {
++			efi_info("%s detected on the kernel command line - disabling physical KASLR\n",
++				 cmdline_memmap_override);
++			seed[0] = 0;
+ 		}
+ 
+ 		boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
+@@ -883,7 +907,7 @@ void __noreturn efi_stub_entry(efi_handl
+ 	}
+ 
+ #ifdef CONFIG_CMDLINE_BOOL
+-	status = efi_parse_options(CONFIG_CMDLINE);
++	status = parse_options(CONFIG_CMDLINE);
+ 	if (status != EFI_SUCCESS) {
+ 		efi_err("Failed to parse options\n");
+ 		goto fail;
+@@ -892,7 +916,7 @@ void __noreturn efi_stub_entry(efi_handl
+ 	if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+ 		unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+ 					       ((u64)boot_params->ext_cmd_line_ptr << 32));
+-		status = efi_parse_options((char *)cmdline_paddr);
++		status = parse_options((char *)cmdline_paddr);
+ 		if (status != EFI_SUCCESS) {
+ 			efi_err("Failed to parse options\n");
+ 			goto fail;
diff --git a/queue-6.9/x86-pci-skip-early-e820-check-for-ecam-region.patch b/queue-6.9/x86-pci-skip-early-e820-check-for-ecam-region.patch
new file mode 100644
index 00000000000..a2f70380296
--- /dev/null
+++ b/queue-6.9/x86-pci-skip-early-e820-check-for-ecam-region.patch
@@ -0,0 +1,131 @@
+From 199f968f1484a14024d0d467211ffc2faf193eb4 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Wed, 17 Apr 2024 15:40:12 -0500
+Subject: x86/pci: Skip early E820 check for ECAM region
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 199f968f1484a14024d0d467211ffc2faf193eb4 upstream.
+
+Arul, Mateusz, Imcarneiro91, and Aman reported a regression caused by
+07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map").  On the
+Lenovo Legion 9i laptop, that commit removes the ECAM area from E820, which
+means the early E820 validation fails, which means we don't enable ECAM in
+the "early MCFG" path.
+
+The static MCFG table describes ECAM without depending on the ACPI
+interpreter.  Many Legion 9i ACPI methods rely on that, so they fail when
+PCI config access isn't available, resulting in the embedded controller,
+PS/2, audio, trackpad, and battery devices not being detected.  The _OSC
+method also fails, so Linux can't take control of the PCIe hotplug, PME,
+and AER features:
+
+  # pci_mmcfg_early_init()
+
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+  PCI: not using ECAM ([mem 0xc0000000-0xce0fffff] not reserved)
+
+  ACPI Error: AE_ERROR, Returned by Handler for [PCI_Config] (20230628/evregion-300)
+  ACPI: Interpreter enabled
+  ACPI: Ignoring error and continuing table load
+  ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+  ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+  ACPI: Skipping parse of AML opcode: OpcodeName unavailable (0x0010)
+  ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+  ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+  ...
+  ACPI Error: Aborting method \_SB.PC00._OSC due to previous error (AE_NOT_FOUND) (20230628/psparse-529)
+  acpi PNP0A08:00: _OSC: platform retains control of PCIe features (AE_NOT_FOUND)
+
+  # pci_mmcfg_late_init()
+
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+  PCI: [Firmware Info]: ECAM [mem 0xc0000000-0xce0fffff] not reserved in ACPI motherboard resources
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] is EfiMemoryMappedIO; assuming valid
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] reserved to work around lack of ACPI motherboard _CRS
+
+Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be reserved by a PNP0C02
+resource, but there's no requirement to mention it in E820, so we shouldn't
+look at E820 to validate the ECAM space described by MCFG.
+
+In 2006, 946f2ee5c731 ("[PATCH] i386/x86-64: Check that MCFG points to an
+e820 reserved area") added a sanity check of E820 to work around buggy MCFG
+tables, but that over-aggressive validation causes failures like this one.
+
+Keep the E820 validation check for machines older than 2016, an arbitrary
+ten years after 946f2ee5c731, so machines that depend on it don't break.
+
+Skip the early E820 check for 2016 and newer BIOSes since there's no
+requirement to describe ECAM in E820.
+
+Link: https://lore.kernel.org/r/20240417204012.215030-2-helgaas@kernel.org
+Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map")
+Reported-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218444
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Tested-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/mmconfig-shared.c |   40 +++++++++++++++++++++++++++++-----------
+ 1 file changed, 29 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/pci/mmconfig-shared.c
++++ b/arch/x86/pci/mmconfig-shared.c
+@@ -518,7 +518,34 @@ static bool __ref pci_mmcfg_reserved(str
+ {
+ 	struct resource *conflict;
+ 
+-	if (!early && !acpi_disabled) {
++	if (early) {
++
++		/*
++		 * Don't try to do this check unless configuration type 1
++		 * is available.  How about type 2?
++		 */
++
++		/*
++		 * 946f2ee5c731 ("Check that MCFG points to an e820
++		 * reserved area") added this E820 check in 2006 to work
++		 * around BIOS defects.
++		 *
++		 * Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be
++		 * reserved by a PNP0C02 resource, but it need not be
++		 * mentioned in E820.  Before the ACPI interpreter is
++		 * available, we can't check for PNP0C02 resources, so
++		 * there's no reliable way to verify the region in this
++		 * early check.  Keep it only for the old machines that
++		 * motivated 946f2ee5c731.
++		 */
++		if (dmi_get_bios_year() < 2016 && raw_pci_ops)
++			return is_mmconf_reserved(e820__mapped_all, cfg, dev,
++						  "E820 entry");
++
++		return true;
++	}
++
++	if (!acpi_disabled) {
+ 		if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+ 				       "ACPI motherboard resource"))
+ 			return true;
+@@ -551,16 +578,7 @@ static bool __ref pci_mmcfg_reserved(str
+ 	 * For MCFG information constructed from hotpluggable host bridge's
+ 	 * _CBA method, just assume it's reserved.
+ 	 */
+-	if (pci_mmcfg_running_state)
+-		return true;
+-
+-	/* Don't try to do this check unless configuration
+-	   type 1 is available. how about type 2 ?*/
+-	if (raw_pci_ops)
+-		return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+-					  "E820 entry");
+-
+-	return false;
++	return pci_mmcfg_running_state;
+ }
+ 
+ static void __init pci_mmcfg_reject_broken(int early)
diff --git a/queue-6.9/x86-topology-handle-bogus-acpi-tables-correctly.patch b/queue-6.9/x86-topology-handle-bogus-acpi-tables-correctly.patch
new file mode 100644
index 00000000000..9265875cb64
--- /dev/null
+++ b/queue-6.9/x86-topology-handle-bogus-acpi-tables-correctly.patch
@@ -0,0 +1,137 @@
+From 9d22c96316ac59ed38e80920c698fed38717b91b Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 May 2024 16:40:36 +0200
+Subject: x86/topology: Handle bogus ACPI tables correctly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9d22c96316ac59ed38e80920c698fed38717b91b upstream.
+
+The ACPI specification clearly states how the processors should be
+enumerated in the MADT:
+
+ "To ensure that the boot processor is supported post initialization,
+  two guidelines should be followed. The first is that OSPM should
+  initialize processors in the order that they appear in the MADT. The
+  second is that platform firmware should list the boot processor as the
+  first processor entry in the MADT.
+  ...
+  Failure of OSPM implementations and platform firmware to abide by
+  these guidelines can result in both unpredictable and non optimal
+  platform operation."
+
+The kernel relies on that ordering to detect the real BSP on crash kernels
+which is important to avoid sending a INIT IPI to it as that would cause a
+full machine reset.
+
+On a Dell XPS 16 9640 the BIOS ignores this rule and enumerates the CPUs in
+the wrong order. As a consequence the kernel falsely detects a crash kernel
+and disables the corresponding CPU.
+
+Prevent this by checking the IA32_APICBASE MSR for the BSP bit on the boot
+CPU. If that bit is set, then the MADT based BSP detection can be safely
+ignored. If the kernel detects a mismatch between the BSP bit and the first
+enumerated MADT entry then emit a firmware bug message.
+
+This obviously also has to be taken into account when the boot APIC ID and
+the first enumerated APIC ID match. If the boot CPU does not have the BSP
+bit set in the APICBASE MSR then there is no way for the boot CPU to
+determine which of the CPUs is the real BSP. Sending an INIT to the real
+BSP would reset the machine so the only sane way to deal with that is to
+limit the number of CPUs to one and emit a corresponding warning message.
+
+Fixes: 5c5682b9f87a ("x86/cpu: Detect real BSP on crash kernels")
+Reported-by: Carsten Tolkmit <ctolkmit@ennit.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Carsten Tolkmit <ctolkmit@ennit.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87le48jycb.ffs@tglx
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218837
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/topology.c | 55 +++++++++++++++++++++++++++++++---
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index d17c9b71eb4a..621a151ccf7d 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -128,6 +128,9 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
+ 
+ static __init bool check_for_real_bsp(u32 apic_id)
+ {
++	bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6;
++	u64 msr;
++
+ 	/*
+ 	 * There is no real good way to detect whether this a kdump()
+ 	 * kernel, but except on the Voyager SMP monstrosity which is not
+@@ -144,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id)
+ 	if (topo_info.real_bsp_apic_id != BAD_APICID)
+ 		return false;
+ 
+-	if (apic_id == topo_info.boot_cpu_apic_id) {
+-		topo_info.real_bsp_apic_id = apic_id;
+-		return false;
++	/*
++	 * Check whether the enumeration order is broken by evaluating the
++	 * BSP bit in the APICBASE MSR. If the CPU does not have the
++	 * APICBASE MSR then the BSP detection is not possible and the
++	 * kernel must rely on the firmware enumeration order.
++	 */
++	if (has_apic_base) {
++		rdmsrl(MSR_IA32_APICBASE, msr);
++		is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
+ 	}
+ 
+-	pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
++	if (apic_id == topo_info.boot_cpu_apic_id) {
++		/*
++		 * If the boot CPU has the APIC BSP bit set then the
++		 * firmware enumeration is agreeing. If the CPU does not
++		 * have the APICBASE MSR then the only choice is to trust
++		 * the enumeration order.
++		 */
++		if (is_bsp || !has_apic_base) {
++			topo_info.real_bsp_apic_id = apic_id;
++			return false;
++		}
++		/*
++		 * If the boot APIC is enumerated first, but the APICBASE
++		 * MSR does not have the BSP bit set, then there is no way
++		 * to discover the real BSP here. Assume a crash kernel and
++		 * limit the number of CPUs to 1 as an INIT to the real BSP
++		 * would reset the machine.
++		 */
++		pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id);
++		pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n");
++		set_nr_cpu_ids(1);
++		goto fwbug;
++	}
++
++	pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
+ 		topo_info.boot_cpu_apic_id, apic_id);
++
++	if (is_bsp) {
++		/*
++		 * The boot CPU has the APIC BSP bit set. Use it and complain
++		 * about the broken firmware enumeration.
++		 */
++		topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
++		goto fwbug;
++	}
++
+ 	pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
+ 
+ 	topo_info.real_bsp_apic_id = apic_id;
+ 	return true;
++
++fwbug:
++	pr_warn(FW_BUG "APIC enumeration order not specification compliant\n");
++	return false;
+ }
+ 
+ static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
+-- 
+2.45.2
+
diff --git a/queue-6.9/x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch b/queue-6.9/x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch
new file mode 100644
index 00000000000..19a9a92a59b
--- /dev/null
+++ b/queue-6.9/x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch
@@ -0,0 +1,109 @@
+From 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 30 May 2024 17:29:18 +0200
+Subject: x86/topology/intel: Unlock CPUID before evaluating anything
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 upstream.
+
+Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If
+this bit is set by the BIOS then CPUID evaluation including topology
+enumeration does not work correctly as the evaluation code does not try
+to analyze any leaf greater than two.
+
+This went unnoticed before because the original topology code just
+repeated evaluation several times and managed to overwrite the initial
+limited information with the correct one later. The new evaluation code
+does it once and therefore ends up with the limited and wrong
+information.
+
+Cure this by unlocking CPUID right before evaluating anything which
+depends on the maximum CPUID leaf being greater than two instead of
+rereading stuff after unlock.
+
+Fixes: 22d63660c35e ("x86/cpu: Use common topology code for Intel")
+Reported-by: Peter Schneider <pschneider1968@googlemail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Peter Schneider <pschneider1968@googlemail.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/fd3f73dc-a86f-4bcf-9c60-43556a21eb42@googlemail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    3 ++-
+ arch/x86/kernel/cpu/cpu.h    |    2 ++
+ arch/x86/kernel/cpu/intel.c  |   25 ++++++++++++++++---------
+ 3 files changed, 20 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(st
+ 	if (have_cpuid_p()) {
+ 		cpu_detect(c);
+ 		get_cpu_vendor(c);
++		intel_unlock_cpuid_leafs(c);
+ 		get_cpu_cap(c);
+ 		setup_force_cpu_cap(X86_FEATURE_CPUID);
+ 		get_cpu_address_sizes(c);
+@@ -1748,7 +1749,7 @@ static void generic_identify(struct cpui
+ 	cpu_detect(c);
+ 
+ 	get_cpu_vendor(c);
+-
++	intel_unlock_cpuid_leafs(c);
+ 	get_cpu_cap(c);
+ 
+ 	get_cpu_address_sizes(c);
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_sta
+ 
+ extern void __init tsx_init(void);
+ void tsx_ap_init(void);
++void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
+ #else
+ static inline void tsx_init(void) { }
+ static inline void tsx_ap_init(void) { }
++static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
+ 
+ extern void init_spectral_chicken(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -268,19 +268,26 @@ detect_keyid_bits:
+ 	c->x86_phys_bits -= keyid_bits;
+ }
+ 
++void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
++{
++	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++		return;
++
++	if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
++		return;
++
++	/*
++	 * The BIOS can have limited CPUID to leaf 2, which breaks feature
++	 * enumeration. Unlock it and update the maximum leaf info.
++	 */
++	if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
++		c->cpuid_level = cpuid_eax(0);
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ 	u64 misc_enable;
+ 
+-	/* Unmask CPUID levels if masked: */
+-	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+-		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
+-				  MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
+-			c->cpuid_level = cpuid_eax(0);
+-			get_cpu_cap(c);
+-		}
+-	}
+-
+ 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
+ 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);