--- /dev/null
+From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001
+From: Hagar Hemdan <hagarhem@amazon.com>
+Date: Tue, 23 Apr 2024 13:59:26 +0000
+Subject: efi: libstub: only free priv.runtime_map when allocated
+
+From: Hagar Hemdan <hagarhem@amazon.com>
+
+commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream.
+
+priv.runtime_map is only allocated when efi_novamap is not set.
+Otherwise, it is an uninitialized value. In the error path, it is freed
+unconditionally. Avoid passing an uninitialized value to free_pool.
+Free priv.runtime_map only when it was allocated.
+
+This bug was discovered and resolved using Coverity Static Analysis
+Security Testing (SAST) by Synopsys, Inc.
+
+Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/fdt.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -335,8 +335,8 @@ fail_free_new_fdt:
+
+ fail:
+ efi_free(fdt_size, fdt_addr);
+-
+- efi_bs_call(free_pool, priv.runtime_map);
++ if (!efi_novamap)
++ efi_bs_call(free_pool, priv.runtime_map);
+
+ return EFI_LOAD_ERROR;
+ }
--- /dev/null
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c | 9 ++++++---
+ kernel/irq/cpuhotplug.c | 16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -1036,7 +1036,8 @@ static void __vector_schedule_cleanup(st
+ add_timer_on(&cl->timer, cpu);
+ }
+ } else {
+- apicd->prev_vector = 0;
++ pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++ free_moved_vector(apicd);
+ }
+ raw_spin_unlock(&vector_lock);
+ }
+@@ -1073,6 +1074,7 @@ void irq_complete_move(struct irq_cfg *c
+ */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++ unsigned int cpu = smp_processor_id();
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
+ unsigned int vector;
+@@ -1097,10 +1099,11 @@ void irq_force_complete_move(struct irq_
+ goto unlock;
+
+ /*
+- * If prev_vector is empty, no action required.
++ * If prev_vector is empty or the descriptor is neither currently
++ * nor previously on the outgoing CPU no action required.
+ */
+ vector = apicd->prev_vector;
+- if (!vector)
++ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ goto unlock;
+
+ /*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+ }
+
+ /*
++ * Complete an eventually pending irq move cleanup. If this
++ * interrupt was moved in hard irq context, then the vectors need
++ * to be cleaned up. It can't wait until this interrupt actually
++ * happens and this CPU was involved.
++ */
++ irq_force_complete_move(desc);
++
++ /*
+ * No move required, if:
+ * - Interrupt is per cpu
+ * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+ }
+
+ /*
+- * Complete an eventually pending irq move cleanup. If this
+- * interrupt was moved in hard irq context, then the vectors need
+- * to be cleaned up. It can't wait until this interrupt actually
+- * happens and this CPU was involved.
+- */
+- irq_force_complete_move(desc);
+-
+- /*
+ * If there is a setaffinity pending, then try to reuse the pending
+ * mask, so the last change of the affinity does not get lost. If
+ * there is no move pending or the pending mask does not contain
--- /dev/null
+From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Wed, 13 Mar 2024 13:58:42 +0100
+Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID
+
+From: Gerd Hoffmann <kraxel@redhat.com>
+
+commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream.
+
+Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16])
+to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths
+to userspace via KVM_GET_SUPPORTED_CPUID.
+
+Per AMD, GuestPhysBits is intended for software use, and physical CPUs do
+not set that field. I.e. GuestPhysBits will be non-zero if and only if
+KVM is running as a nested hypervisor, and in that case, GuestPhysBits is
+NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with
+TDP enabled.
+
+E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum
+*addressable* guest physical address, which would result in KVM under-
+reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52,
+but without 5-level paging.
+
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com
+[sean: rewrite changelog with --verbose, Cc stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -1232,9 +1232,8 @@ static inline int __do_cpuid_func(struct
+ entry->eax = entry->ebx = entry->ecx = 0;
+ break;
+ case 0x80000008: {
+- unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+- unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+- unsigned phys_as = entry->eax & 0xff;
++ unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
++ unsigned int phys_as;
+
+ /*
+ * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+@@ -1242,16 +1241,16 @@ static inline int __do_cpuid_func(struct
+ * reductions in MAXPHYADDR for memory encryption affect shadow
+ * paging, too.
+ *
+- * If TDP is enabled but an explicit guest MAXPHYADDR is not
+- * provided, use the raw bare metal MAXPHYADDR as reductions to
+- * the HPAs do not affect GPAs.
++ * If TDP is enabled, use the raw bare metal MAXPHYADDR as
++ * reductions to the HPAs do not affect GPAs.
+ */
+- if (!tdp_enabled)
+- g_phys_as = boot_cpu_data.x86_phys_bits;
+- else if (!g_phys_as)
+- g_phys_as = phys_as;
++ if (!tdp_enabled) {
++ phys_as = boot_cpu_data.x86_phys_bits;
++ } else {
++ phys_as = entry->eax & 0xff;
++ }
+
+- entry->eax = g_phys_as | (virt_as << 8);
++ entry->eax = phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+ entry->edx = 0;
+ cpuid_entry_override(entry, CPUID_8000_0008_EBX);
--- /dev/null
+From 2920141fc149f71bad22361946417bc43783ed7f Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Tue, 23 Apr 2024 13:46:10 -0700
+Subject: platform/x86/intel/tpmi: Handle error from tpmi_process_info()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 2920141fc149f71bad22361946417bc43783ed7f upstream.
+
+When tpmi_process_info() returns error, fail to load the driver.
+This can happen if call to ioremap() returns error.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.3+
+Link: https://lore.kernel.org/r/20240423204619.3946901-2-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/tpmi.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/platform/x86/intel/tpmi.c
++++ b/drivers/platform/x86/intel/tpmi.c
+@@ -763,8 +763,11 @@ static int intel_vsec_tpmi_init(struct a
+ * when actual device nodes created outside this
+ * loop via tpmi_create_devices().
+ */
+- if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID)
+- tpmi_process_info(tpmi_info, pfs);
++ if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID) {
++ ret = tpmi_process_info(tpmi_info, pfs);
++ if (ret)
++ return ret;
++ }
+
+ if (pfs->pfs_header.tpmi_id == TPMI_CONTROL_ID)
+ tpmi_set_control_base(auxdev, tpmi_info, pfs);
--- /dev/null
+From db643cb7ebe524d17b4b13583dda03485d4a1bc0 Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Mon, 15 Apr 2024 14:52:10 -0700
+Subject: platform/x86/intel-uncore-freq: Don't present root domain on error
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit db643cb7ebe524d17b4b13583dda03485d4a1bc0 upstream.
+
+If none of the clusters are added because of some error, fail to load
+driver without presenting root domain. In this case root domain will
+present invalid data.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Fixes: 01c10f88c9b7 ("platform/x86/intel-uncore-freq: tpmi: Provide cluster level control")
+Cc: <stable@vger.kernel.org> # 6.5+
+Link: https://lore.kernel.org/r/20240415215210.2824868-1-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+@@ -240,6 +240,7 @@ static int uncore_probe(struct auxiliary
+ bool read_blocked = 0, write_blocked = 0;
+ struct intel_tpmi_plat_info *plat_info;
+ struct tpmi_uncore_struct *tpmi_uncore;
++ bool uncore_sysfs_added = false;
+ int ret, i, pkg = 0;
+ int num_resources;
+
+@@ -384,9 +385,15 @@ static int uncore_probe(struct auxiliary
+ }
+ /* Point to next cluster offset */
+ cluster_offset >>= UNCORE_MAX_CLUSTER_PER_DOMAIN;
++ uncore_sysfs_added = true;
+ }
+ }
+
++ if (!uncore_sysfs_added) {
++ ret = -ENODEV;
++ goto remove_clusters;
++ }
++
+ auxiliary_set_drvdata(auxdev, tpmi_uncore);
+
+ tpmi_uncore->root_cluster.root_domain = true;
riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch
alsa-seq-ump-fix-swapped-song-position-pointer-data.patch
revert-drm-make-drivers-depends-on-drm_dw_hdmi.patch
+x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
+efi-libstub-only-free-priv.runtime_map-when-allocated.patch
+x86-topology-handle-bogus-acpi-tables-correctly.patch
+x86-pci-skip-early-e820-check-for-ecam-region.patch
+kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
+x86-topology-intel-unlock-cpuid-before-evaluating-anything.patch
+genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
+platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
+platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
--- /dev/null
+From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 16 May 2024 11:05:42 +0200
+Subject: x86/efistub: Omit physical KASLR when memory reservations exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream.
+
+The legacy decompressor has elaborate logic to ensure that the
+randomized physical placement of the decompressed kernel image does not
+conflict with any memory reservations, including ones specified on the
+command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are
+taken into account by the kernel proper at a later stage.
+
+When booting in EFI mode, it is the firmware's job to ensure that the
+chosen range does not conflict with any memory reservations that it
+knows about, and this is trivially achieved by using the firmware's
+memory allocation APIs.
+
+That leaves reservations specified on the command line, though, which
+the firmware knows nothing about, as these regions have no other special
+significance to the platform. Since commit
+
+ a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+
+these reservations are not taken into account when randomizing the
+physical placement, which may result in conflicts where the memory
+cannot be reserved by the kernel proper because its own executable image
+resides there.
+
+To avoid having to duplicate or reuse the existing complicated logic,
+disable physical KASLR entirely when such overrides are specified. These
+are mostly diagnostic tools or niche features, and physical KASLR (as
+opposed to virtual KASLR, which is much more important as it affects the
+memory addresses observed by code executing in the kernel) is something
+we can live without.
+
+Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com
+Reported-by: Ben Chaney <bchaney@akamai.com>
+Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+Cc: <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/x86-stub.c | 28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -776,6 +776,26 @@ static void error(char *str)
+ efi_warn("Decompression failed: %s\n", str);
+ }
+
++static const char *cmdline_memmap_override;
++
++static efi_status_t parse_options(const char *cmdline)
++{
++ static const char opts[][14] = {
++ "mem=", "memmap=", "efi_fake_mem=", "hugepages="
++ };
++
++ for (int i = 0; i < ARRAY_SIZE(opts); i++) {
++ const char *p = strstr(cmdline, opts[i]);
++
++ if (p == cmdline || (p > cmdline && isspace(p[-1]))) {
++ cmdline_memmap_override = opts[i];
++ break;
++ }
++ }
++
++ return efi_parse_options(cmdline);
++}
++
+ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+ {
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+@@ -807,6 +827,10 @@ static efi_status_t efi_decompress_kerne
+ !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
+ efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
+ seed[0] = 0;
++ } else if (cmdline_memmap_override) {
++ efi_info("%s detected on the kernel command line - disabling physical KASLR\n",
++ cmdline_memmap_override);
++ seed[0] = 0;
+ }
+
+ boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
+@@ -883,7 +907,7 @@ void __noreturn efi_stub_entry(efi_handl
+ }
+
+ #ifdef CONFIG_CMDLINE_BOOL
+- status = efi_parse_options(CONFIG_CMDLINE);
++ status = parse_options(CONFIG_CMDLINE);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to parse options\n");
+ goto fail;
+@@ -892,7 +916,7 @@ void __noreturn efi_stub_entry(efi_handl
+ if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+ unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+ ((u64)boot_params->ext_cmd_line_ptr << 32));
+- status = efi_parse_options((char *)cmdline_paddr);
++ status = parse_options((char *)cmdline_paddr);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to parse options\n");
+ goto fail;
--- /dev/null
+From 199f968f1484a14024d0d467211ffc2faf193eb4 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Wed, 17 Apr 2024 15:40:12 -0500
+Subject: x86/pci: Skip early E820 check for ECAM region
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 199f968f1484a14024d0d467211ffc2faf193eb4 upstream.
+
+Arul, Mateusz, Imcarneiro91, and Aman reported a regression caused by
+07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map"). On the
+Lenovo Legion 9i laptop, that commit removes the ECAM area from E820, which
+means the early E820 validation fails, which means we don't enable ECAM in
+the "early MCFG" path.
+
+The static MCFG table describes ECAM without depending on the ACPI
+interpreter. Many Legion 9i ACPI methods rely on that, so they fail when
+PCI config access isn't available, resulting in the embedded controller,
+PS/2, audio, trackpad, and battery devices not being detected. The _OSC
+method also fails, so Linux can't take control of the PCIe hotplug, PME,
+and AER features:
+
+ # pci_mmcfg_early_init()
+
+ PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+ PCI: not using ECAM ([mem 0xc0000000-0xce0fffff] not reserved)
+
+ ACPI Error: AE_ERROR, Returned by Handler for [PCI_Config] (20230628/evregion-300)
+ ACPI: Interpreter enabled
+ ACPI: Ignoring error and continuing table load
+ ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+ ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+ ACPI: Skipping parse of AML opcode: OpcodeName unavailable (0x0010)
+ ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+ ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+ ...
+ ACPI Error: Aborting method \_SB.PC00._OSC due to previous error (AE_NOT_FOUND) (20230628/psparse-529)
+ acpi PNP0A08:00: _OSC: platform retains control of PCIe features (AE_NOT_FOUND)
+
+ # pci_mmcfg_late_init()
+
+ PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+ PCI: [Firmware Info]: ECAM [mem 0xc0000000-0xce0fffff] not reserved in ACPI motherboard resources
+ PCI: ECAM [mem 0xc0000000-0xce0fffff] is EfiMemoryMappedIO; assuming valid
+ PCI: ECAM [mem 0xc0000000-0xce0fffff] reserved to work around lack of ACPI motherboard _CRS
+
+Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be reserved by a PNP0C02
+resource, but there's no requirement to mention it in E820, so we shouldn't
+look at E820 to validate the ECAM space described by MCFG.
+
+In 2006, 946f2ee5c731 ("[PATCH] i386/x86-64: Check that MCFG points to an
+e820 reserved area") added a sanity check of E820 to work around buggy MCFG
+tables, but that over-aggressive validation causes failures like this one.
+
+Keep the E820 validation check for machines older than 2016, an arbitrary
+ten years after 946f2ee5c731, so machines that depend on it don't break.
+
+Skip the early E820 check for 2016 and newer BIOSes since there's no
+requirement to describe ECAM in E820.
+
+Link: https://lore.kernel.org/r/20240417204012.215030-2-helgaas@kernel.org
+Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map")
+Reported-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218444
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Tested-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/mmconfig-shared.c | 40 +++++++++++++++++++++++++++++-----------
+ 1 file changed, 29 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/pci/mmconfig-shared.c
++++ b/arch/x86/pci/mmconfig-shared.c
+@@ -518,7 +518,34 @@ static bool __ref pci_mmcfg_reserved(str
+ {
+ struct resource *conflict;
+
+- if (!early && !acpi_disabled) {
++ if (early) {
++
++ /*
++ * Don't try to do this check unless configuration type 1
++ * is available. How about type 2?
++ */
++
++ /*
++ * 946f2ee5c731 ("Check that MCFG points to an e820
++ * reserved area") added this E820 check in 2006 to work
++ * around BIOS defects.
++ *
++ * Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be
++ * reserved by a PNP0C02 resource, but it need not be
++ * mentioned in E820. Before the ACPI interpreter is
++ * available, we can't check for PNP0C02 resources, so
++ * there's no reliable way to verify the region in this
++ * early check. Keep it only for the old machines that
++ * motivated 946f2ee5c731.
++ */
++ if (dmi_get_bios_year() < 2016 && raw_pci_ops)
++ return is_mmconf_reserved(e820__mapped_all, cfg, dev,
++ "E820 entry");
++
++ return true;
++ }
++
++ if (!acpi_disabled) {
+ if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+ "ACPI motherboard resource"))
+ return true;
+@@ -551,16 +578,7 @@ static bool __ref pci_mmcfg_reserved(str
+ * For MCFG information constructed from hotpluggable host bridge's
+ * _CBA method, just assume it's reserved.
+ */
+- if (pci_mmcfg_running_state)
+- return true;
+-
+- /* Don't try to do this check unless configuration
+- type 1 is available. how about type 2 ?*/
+- if (raw_pci_ops)
+- return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+- "E820 entry");
+-
+- return false;
++ return pci_mmcfg_running_state;
+ }
+
+ static void __init pci_mmcfg_reject_broken(int early)
--- /dev/null
+From 9d22c96316ac59ed38e80920c698fed38717b91b Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 May 2024 16:40:36 +0200
+Subject: x86/topology: Handle bogus ACPI tables correctly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9d22c96316ac59ed38e80920c698fed38717b91b upstream.
+
+The ACPI specification clearly states how the processors should be
+enumerated in the MADT:
+
+ "To ensure that the boot processor is supported post initialization,
+ two guidelines should be followed. The first is that OSPM should
+ initialize processors in the order that they appear in the MADT. The
+ second is that platform firmware should list the boot processor as the
+ first processor entry in the MADT.
+ ...
+ Failure of OSPM implementations and platform firmware to abide by
+ these guidelines can result in both unpredictable and non optimal
+ platform operation."
+
+The kernel relies on that ordering to detect the real BSP on crash kernels
+which is important to avoid sending a INIT IPI to it as that would cause a
+full machine reset.
+
+On a Dell XPS 16 9640 the BIOS ignores this rule and enumerates the CPUs in
+the wrong order. As a consequence the kernel falsely detects a crash kernel
+and disables the corresponding CPU.
+
+Prevent this by checking the IA32_APICBASE MSR for the BSP bit on the boot
+CPU. If that bit is set, then the MADT based BSP detection can be safely
+ignored. If the kernel detects a mismatch between the BSP bit and the first
+enumerated MADT entry then emit a firmware bug message.
+
+This obviously also has to be taken into account when the boot APIC ID and
+the first enumerated APIC ID match. If the boot CPU does not have the BSP
+bit set in the APICBASE MSR then there is no way for the boot CPU to
+determine which of the CPUs is the real BSP. Sending an INIT to the real
+BSP would reset the machine so the only sane way to deal with that is to
+limit the number of CPUs to one and emit a corresponding warning message.
+
+Fixes: 5c5682b9f87a ("x86/cpu: Detect real BSP on crash kernels")
+Reported-by: Carsten Tolkmit <ctolkmit@ennit.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Carsten Tolkmit <ctolkmit@ennit.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/87le48jycb.ffs@tglx
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218837
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/topology.c | 55 +++++++++++++++++++++++++++++++---
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index d17c9b71eb4a..621a151ccf7d 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -128,6 +128,9 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
+
+ static __init bool check_for_real_bsp(u32 apic_id)
+ {
++ bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6;
++ u64 msr;
++
+ /*
+ * There is no real good way to detect whether this a kdump()
+ * kernel, but except on the Voyager SMP monstrosity which is not
+@@ -144,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id)
+ if (topo_info.real_bsp_apic_id != BAD_APICID)
+ return false;
+
+- if (apic_id == topo_info.boot_cpu_apic_id) {
+- topo_info.real_bsp_apic_id = apic_id;
+- return false;
++ /*
++ * Check whether the enumeration order is broken by evaluating the
++ * BSP bit in the APICBASE MSR. If the CPU does not have the
++ * APICBASE MSR then the BSP detection is not possible and the
++ * kernel must rely on the firmware enumeration order.
++ */
++ if (has_apic_base) {
++ rdmsrl(MSR_IA32_APICBASE, msr);
++ is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
+ }
+
+- pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
++ if (apic_id == topo_info.boot_cpu_apic_id) {
++ /*
++ * If the boot CPU has the APIC BSP bit set then the
++ * firmware enumeration is agreeing. If the CPU does not
++ * have the APICBASE MSR then the only choice is to trust
++ * the enumeration order.
++ */
++ if (is_bsp || !has_apic_base) {
++ topo_info.real_bsp_apic_id = apic_id;
++ return false;
++ }
++ /*
++ * If the boot APIC is enumerated first, but the APICBASE
++ * MSR does not have the BSP bit set, then there is no way
++ * to discover the real BSP here. Assume a crash kernel and
++ * limit the number of CPUs to 1 as an INIT to the real BSP
++ * would reset the machine.
++ */
++ pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id);
++ pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n");
++ set_nr_cpu_ids(1);
++ goto fwbug;
++ }
++
++ pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
+ topo_info.boot_cpu_apic_id, apic_id);
++
++ if (is_bsp) {
++ /*
++ * The boot CPU has the APIC BSP bit set. Use it and complain
++ * about the broken firmware enumeration.
++ */
++ topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
++ goto fwbug;
++ }
++
+ pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
+
+ topo_info.real_bsp_apic_id = apic_id;
+ return true;
++
++fwbug:
++ pr_warn(FW_BUG "APIC enumeration order not specification compliant\n");
++ return false;
+ }
+
+ static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
+--
+2.45.2
+
--- /dev/null
+From 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 30 May 2024 17:29:18 +0200
+Subject: x86/topology/intel: Unlock CPUID before evaluating anything
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 upstream.
+
+Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If
+this bit is set by the BIOS then CPUID evaluation including topology
+enumeration does not work correctly as the evaluation code does not try
+to analyze any leaf greater than two.
+
+This went unnoticed before because the original topology code just
+repeated evaluation several times and managed to overwrite the initial
+limited information with the correct one later. The new evaluation code
+does it once and therefore ends up with the limited and wrong
+information.
+
+Cure this by unlocking CPUID right before evaluating anything which
+depends on the maximum CPUID leaf being greater than two instead of
+rereading stuff after unlock.
+
+Fixes: 22d63660c35e ("x86/cpu: Use common topology code for Intel")
+Reported-by: Peter Schneider <pschneider1968@googlemail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Peter Schneider <pschneider1968@googlemail.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/fd3f73dc-a86f-4bcf-9c60-43556a21eb42@googlemail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 3 ++-
+ arch/x86/kernel/cpu/cpu.h | 2 ++
+ arch/x86/kernel/cpu/intel.c | 25 ++++++++++++++++---------
+ 3 files changed, 20 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(st
+ if (have_cpuid_p()) {
+ cpu_detect(c);
+ get_cpu_vendor(c);
++ intel_unlock_cpuid_leafs(c);
+ get_cpu_cap(c);
+ setup_force_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
+@@ -1748,7 +1749,7 @@ static void generic_identify(struct cpui
+ cpu_detect(c);
+
+ get_cpu_vendor(c);
+-
++ intel_unlock_cpuid_leafs(c);
+ get_cpu_cap(c);
+
+ get_cpu_address_sizes(c);
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_sta
+
+ extern void __init tsx_init(void);
+ void tsx_ap_init(void);
++void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
+ #else
+ static inline void tsx_init(void) { }
+ static inline void tsx_ap_init(void) { }
++static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
+
+ extern void init_spectral_chicken(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -268,19 +268,26 @@ detect_keyid_bits:
+ c->x86_phys_bits -= keyid_bits;
+ }
+
++void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
++{
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++ return;
++
++ if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
++ return;
++
++ /*
++ * The BIOS can have limited CPUID to leaf 2, which breaks feature
++ * enumeration. Unlock it and update the maximum leaf info.
++ */
++ if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
++ c->cpuid_level = cpuid_eax(0);
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ u64 misc_enable;
+
+- /* Unmask CPUID levels if masked: */
+- if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+- if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
+- MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
+- c->cpuid_level = cpuid_eax(0);
+- get_cpu_cap(c);
+- }
+- }
+-
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);