From 82ddd16b4147f820df7fa452e94dc26f38454a95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Jun 2024 15:13:39 +0200 Subject: [PATCH] 6.6-stable patches added patches: efi-libstub-only-free-priv.runtime_map-when-allocated.patch genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch x86-pci-skip-early-e820-check-for-ecam-region.patch --- ...arm64-fpsimd-drop-unneeded-busy-flag.patch | 218 --------------- ...serve-restore-kernel-mode-neon-at-co.patch | 251 ------------------ ...free-priv.runtime_map-when-allocated.patch | 39 +++ ...event-vector-leak-during-cpu-offline.patch | 123 +++++++++ ...xphyaddr-as-host.maxphyaddr-in-cpuid.patch | 74 ++++++ ...-handle-error-from-tpmi_process_info.patch | 42 +++ ...q-don-t-present-root-domain-on-error.patch | 50 ++++ queue-6.6/series | 9 +- ...kaslr-when-memory-reservations-exist.patch | 107 ++++++++ ...kip-early-e820-check-for-ecam-region.patch | 131 +++++++++ 10 files changed, 573 insertions(+), 471 deletions(-) delete mode 100644 queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch delete mode 100644 queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch create mode 100644 queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch create mode 100644 queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch create mode 100644 queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch create mode 100644 queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch create mode 100644 queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch create mode 100644 queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch create mode 100644 queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch diff --git a/queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch b/queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch deleted file mode 100644 index bb2e8c893d9..00000000000 --- a/queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 37f2773a1ef05374538d5e4ed26cbacebe363241 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 8 Dec 2023 12:32:20 +0100 -Subject: arm64: fpsimd: Drop unneeded 'busy' flag - -From: Ard Biesheuvel - -[ Upstream commit 9b19700e623f96222c69ecb2adecb1a3e3664cc0 ] - -Kernel mode NEON will preserve the user mode FPSIMD state by saving it -into the task struct before clobbering the registers. In order to avoid -the need for preserving kernel mode state too, we disallow nested use of -kernel mode NEON, i..e, use in softirq context while the interrupted -task context was using kernel mode NEON too. - -Originally, this policy was implemented using a per-CPU flag which was -exposed via may_use_simd(), requiring the users of the kernel mode NEON -to deal with the possibility that it might return false, and having NEON -and non-NEON code paths. This policy was changed by commit -13150149aa6ded1 ("arm64: fpsimd: run kernel mode NEON with softirqs -disabled"), and now, softirq processing is disabled entirely instead, -and so may_use_simd() can never fail when called from task or softirq -context. - -This means we can drop the fpsimd_context_busy flag entirely, and -instead, ensure that we disable softirq processing in places where we -formerly relied on the flag for preventing races in the FPSIMD preserve -routines. - -Signed-off-by: Ard Biesheuvel -Reviewed-by: Mark Brown -Tested-by: Geert Uytterhoeven -Link: https://lore.kernel.org/r/20231208113218.3001940-7-ardb@google.com -[will: Folded in fix from CAMj1kXFhzbJRyWHELCivQW1yJaF=p07LLtbuyXYX3G1WtsdyQg@mail.gmail.com] -Signed-off-by: Will Deacon -Stable-dep-of: b8995a184170 ("Revert "arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD"") -Signed-off-by: Sasha Levin ---- - arch/arm64/include/asm/simd.h | 11 +------ - arch/arm64/kernel/fpsimd.c | 55 +++++++++-------------------------- - 2 files changed, 15 insertions(+), 51 deletions(-) - -diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h -index 6a75d7ecdcaa2..8e86c9e70e483 100644 ---- a/arch/arm64/include/asm/simd.h -+++ b/arch/arm64/include/asm/simd.h -@@ -12,8 +12,6 @@ - #include - #include - --DECLARE_PER_CPU(bool, fpsimd_context_busy); -- - #ifdef CONFIG_KERNEL_MODE_NEON - - /* -@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void) - /* - * We must make sure that the SVE has been initialized properly - * before using the SIMD in kernel. -- * fpsimd_context_busy is only set while preemption is disabled, -- * and is clear whenever preemption is enabled. Since -- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy -- * cannot change under our feet -- if it's set we cannot be -- * migrated, and if it's clear we cannot be migrated to a CPU -- * where it is set. - */ - return !WARN_ON(!system_capabilities_finalized()) && - system_supports_fpsimd() && -- !in_hardirq() && !irqs_disabled() && !in_nmi() && -- !this_cpu_read(fpsimd_context_busy); -+ !in_hardirq() && !irqs_disabled() && !in_nmi(); - } - - #else /* ! CONFIG_KERNEL_MODE_NEON */ -diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c -index 5cdfcc9e3e54b..b805bdab284c4 100644 ---- a/arch/arm64/kernel/fpsimd.c -+++ b/arch/arm64/kernel/fpsimd.c -@@ -85,13 +85,13 @@ - * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and - * flag the register state as invalid. - * -- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may -- * save the task's FPSIMD context back to task_struct from softirq context. -- * To prevent this from racing with the manipulation of the task's FPSIMD state -- * from task context and thereby corrupting the state, it is necessary to -- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE -- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to -- * run but prevent them to use FPSIMD. -+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be -+ * called from softirq context, which will save the task's FPSIMD context back -+ * to task_struct. To prevent this from racing with the manipulation of the -+ * task's FPSIMD state from task context and thereby corrupting the state, it -+ * is necessary to protect any manipulation of a task's fpsimd_state or -+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend -+ * softirq servicing entirely until put_cpu_fpsimd_context() is called. - * - * For a certain task, the sequence may look something like this: - * - the task gets scheduled in; if both the task's fpsimd_cpu field -@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { } - - #endif - --DEFINE_PER_CPU(bool, fpsimd_context_busy); --EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); -- - static void fpsimd_bind_task_to_cpu(void); - --static void __get_cpu_fpsimd_context(void) --{ -- bool busy = __this_cpu_xchg(fpsimd_context_busy, true); -- -- WARN_ON(busy); --} -- - /* - * Claim ownership of the CPU FPSIMD context for use by the calling context. - * - * The caller may freely manipulate the FPSIMD context metadata until - * put_cpu_fpsimd_context() is called. - * -- * The double-underscore version must only be called if you know the task -- * can't be preempted. -- * - * On RT kernels local_bh_disable() is not sufficient because it only - * serializes soft interrupt related sections via a local lock, but stays - * preemptible. Disabling preemption is the right choice here as bottom -@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void) - local_bh_disable(); - else - preempt_disable(); -- __get_cpu_fpsimd_context(); --} -- --static void __put_cpu_fpsimd_context(void) --{ -- bool busy = __this_cpu_xchg(fpsimd_context_busy, false); -- -- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ - } - - /* -@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void) - */ - static void put_cpu_fpsimd_context(void) - { -- __put_cpu_fpsimd_context(); - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_enable(); - else - preempt_enable(); - } - --static bool have_cpu_fpsimd_context(void) --{ -- return !preemptible() && __this_cpu_read(fpsimd_context_busy); --} -- - unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) - { - return task->thread.vl[type]; -@@ -383,7 +356,7 @@ static void task_fpsimd_load(void) - bool restore_ffr; - - WARN_ON(!system_supports_fpsimd()); -- WARN_ON(!have_cpu_fpsimd_context()); -+ WARN_ON(preemptible()); - - if (system_supports_sve() || system_supports_sme()) { - switch (current->thread.fp_type) { -@@ -467,7 +440,7 @@ static void fpsimd_save(void) - unsigned int vl; - - WARN_ON(!system_supports_fpsimd()); -- WARN_ON(!have_cpu_fpsimd_context()); -+ WARN_ON(preemptible()); - - if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - return; -@@ -1583,7 +1556,7 @@ void fpsimd_thread_switch(struct task_struct *next) - if (!system_supports_fpsimd()) - return; - -- __get_cpu_fpsimd_context(); -+ WARN_ON_ONCE(!irqs_disabled()); - - /* Save unsaved fpsimd state, if any: */ - fpsimd_save(); -@@ -1599,8 +1572,6 @@ void fpsimd_thread_switch(struct task_struct *next) - - update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, - wrong_task || wrong_cpu); -- -- __put_cpu_fpsimd_context(); - } - - static void fpsimd_flush_thread_vl(enum vec_type type) -@@ -1892,13 +1863,15 @@ static void fpsimd_flush_cpu_state(void) - */ - void fpsimd_save_and_flush_cpu_state(void) - { -+ unsigned long flags; -+ - if (!system_supports_fpsimd()) - return; - WARN_ON(preemptible()); -- __get_cpu_fpsimd_context(); -+ local_irq_save(flags); - fpsimd_save(); - fpsimd_flush_cpu_state(); -- __put_cpu_fpsimd_context(); -+ local_irq_restore(flags); - } - - #ifdef CONFIG_KERNEL_MODE_NEON --- -2.43.0 - diff --git a/queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch b/queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch deleted file mode 100644 index dca376b7bea..00000000000 --- a/queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch +++ /dev/null @@ -1,251 +0,0 @@ -From bf3880ed007142c4d7ff620fb293c8c2becd58ce Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 8 Dec 2023 12:32:21 +0100 -Subject: arm64: fpsimd: Preserve/restore kernel mode NEON at context switch - -From: Ard Biesheuvel - -[ Upstream commit aefbab8e77eb16b56e18f24b85a09ebf4dc60e93 ] - -Currently, the FPSIMD register file is not preserved and restored along -with the general registers on exception entry/exit or context switch. -For this reason, we disable preemption when enabling FPSIMD for kernel -mode use in task context, and suspend the processing of softirqs so that -there are no concurrent uses in the kernel. (Kernel mode FPSIMD may not -be used at all in other contexts). - -Disabling preemption while doing CPU intensive work on inputs of -potentially unbounded size is bad for real-time performance, which is -why we try and ensure that SIMD crypto code does not operate on more -than ~4k at a time, which is an arbitrary limit and requires assembler -code to implement efficiently. - -We can avoid the need for disabling preemption if we can ensure that any -in-kernel users of the NEON will not lose the FPSIMD register state -across a context switch. And given that disabling softirqs implicitly -disables preemption as well, we will also have to ensure that a softirq -that runs code using FPSIMD can safely interrupt an in-kernel user. - -So introduce a thread_info flag TIF_KERNEL_FPSTATE, and modify the -context switch hook for FPSIMD to preserve and restore the kernel mode -FPSIMD to/from struct thread_struct when it is set. This avoids any -scheduling blackouts due to prolonged use of FPSIMD in kernel mode, -without the need for manual yielding. - -In order to support softirq processing while FPSIMD is being used in -kernel task context, use the same flag to decide whether the kernel mode -FPSIMD state needs to be preserved and restored before allowing FPSIMD -to be used in softirq context. - -Signed-off-by: Ard Biesheuvel -Reviewed-by: Mark Brown -Reviewed-by: Mark Rutland -Link: https://lore.kernel.org/r/20231208113218.3001940-8-ardb@google.com -Signed-off-by: Will Deacon -Stable-dep-of: b8995a184170 ("Revert "arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD"") -Signed-off-by: Sasha Levin ---- - arch/arm64/include/asm/processor.h | 2 + - arch/arm64/include/asm/thread_info.h | 1 + - arch/arm64/kernel/fpsimd.c | 92 ++++++++++++++++++++++------ - 3 files changed, 77 insertions(+), 18 deletions(-) - -diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h -index e5bc54522e711..ce6eebd6c08bd 100644 ---- a/arch/arm64/include/asm/processor.h -+++ b/arch/arm64/include/asm/processor.h -@@ -167,6 +167,8 @@ struct thread_struct { - unsigned long fault_address; /* fault info */ - unsigned long fault_code; /* ESR_EL1 value */ - struct debug_info debug; /* debugging */ -+ -+ struct user_fpsimd_state kernel_fpsimd_state; - #ifdef CONFIG_ARM64_PTR_AUTH - struct ptrauth_keys_user keys_user; - #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL -diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h -index 553d1bc559c60..e72a3bf9e5634 100644 ---- a/arch/arm64/include/asm/thread_info.h -+++ b/arch/arm64/include/asm/thread_info.h -@@ -80,6 +80,7 @@ void arch_setup_new_exec(void); - #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ - #define TIF_SME 27 /* SME in use */ - #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ -+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ - - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c -index b805bdab284c4..aa695057c93dd 100644 ---- a/arch/arm64/kernel/fpsimd.c -+++ b/arch/arm64/kernel/fpsimd.c -@@ -357,6 +357,7 @@ static void task_fpsimd_load(void) - - WARN_ON(!system_supports_fpsimd()); - WARN_ON(preemptible()); -+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); - - if (system_supports_sve() || system_supports_sme()) { - switch (current->thread.fp_type) { -@@ -379,7 +380,7 @@ static void task_fpsimd_load(void) - default: - /* - * This indicates either a bug in -- * fpsimd_save() or memory corruption, we -+ * fpsimd_save_user_state() or memory corruption, we - * should always record an explicit format - * when we save. We always at least have the - * memory allocated for FPSMID registers so -@@ -430,7 +431,7 @@ static void task_fpsimd_load(void) - * than via current, if we are saving KVM state then it will have - * ensured that the type of registers to save is set in last->to_save. - */ --static void fpsimd_save(void) -+static void fpsimd_save_user_state(void) - { - struct cpu_fp_state const *last = - this_cpu_ptr(&fpsimd_last_state); -@@ -863,7 +864,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, - if (task == current) { - get_cpu_fpsimd_context(); - -- fpsimd_save(); -+ fpsimd_save_user_state(); - } - - fpsimd_flush_task_state(task); -@@ -1549,6 +1550,16 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) - current); - } - -+static void fpsimd_load_kernel_state(struct task_struct *task) -+{ -+ fpsimd_load_state(&task->thread.kernel_fpsimd_state); -+} -+ -+static void fpsimd_save_kernel_state(struct task_struct *task) -+{ -+ fpsimd_save_state(&task->thread.kernel_fpsimd_state); -+} -+ - void fpsimd_thread_switch(struct task_struct *next) - { - bool wrong_task, wrong_cpu; -@@ -1559,19 +1570,28 @@ void fpsimd_thread_switch(struct task_struct *next) - WARN_ON_ONCE(!irqs_disabled()); - - /* Save unsaved fpsimd state, if any: */ -- fpsimd_save(); -+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) -+ fpsimd_save_kernel_state(current); -+ else -+ fpsimd_save_user_state(); - -- /* -- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's -- * state. For kernel threads, FPSIMD registers are never loaded -- * and wrong_task and wrong_cpu will always be true. -- */ -- wrong_task = __this_cpu_read(fpsimd_last_state.st) != -- &next->thread.uw.fpsimd_state; -- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); -+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { -+ fpsimd_load_kernel_state(next); -+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); -+ } else { -+ /* -+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's -+ * state. For kernel threads, FPSIMD registers are never -+ * loaded with user mode FPSIMD state and so wrong_task and -+ * wrong_cpu will always be true. -+ */ -+ wrong_task = __this_cpu_read(fpsimd_last_state.st) != -+ &next->thread.uw.fpsimd_state; -+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); - -- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, -- wrong_task || wrong_cpu); -+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, -+ wrong_task || wrong_cpu); -+ } - } - - static void fpsimd_flush_thread_vl(enum vec_type type) -@@ -1661,7 +1681,7 @@ void fpsimd_preserve_current_state(void) - return; - - get_cpu_fpsimd_context(); -- fpsimd_save(); -+ fpsimd_save_user_state(); - put_cpu_fpsimd_context(); - } - -@@ -1869,7 +1889,7 @@ void fpsimd_save_and_flush_cpu_state(void) - return; - WARN_ON(preemptible()); - local_irq_save(flags); -- fpsimd_save(); -+ fpsimd_save_user_state(); - fpsimd_flush_cpu_state(); - local_irq_restore(flags); - } -@@ -1903,10 +1923,37 @@ void kernel_neon_begin(void) - get_cpu_fpsimd_context(); - - /* Save unsaved fpsimd state, if any: */ -- fpsimd_save(); -+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) { -+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); -+ fpsimd_save_kernel_state(current); -+ } else { -+ fpsimd_save_user_state(); -+ -+ /* -+ * Set the thread flag so that the kernel mode FPSIMD state -+ * will be context switched along with the rest of the task -+ * state. -+ * -+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel -+ * mode FPSIMD, but the task will not be preemptible so setting -+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it -+ * would mark the task context FPSIMD state as requiring a -+ * context switch) and unnecessary. -+ * -+ * On PREEMPT_RT, softirqs are serviced from a separate thread, -+ * which is scheduled as usual, and this guarantees that these -+ * softirqs are not interrupting use of the FPSIMD in kernel -+ * mode in task context. So in this case, setting the flag here -+ * is always appropriate. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) -+ set_thread_flag(TIF_KERNEL_FPSTATE); -+ } - - /* Invalidate any task state remaining in the fpsimd regs: */ - fpsimd_flush_cpu_state(); -+ -+ put_cpu_fpsimd_context(); - } - EXPORT_SYMBOL_GPL(kernel_neon_begin); - -@@ -1924,7 +1971,16 @@ void kernel_neon_end(void) - if (!system_supports_fpsimd()) - return; - -- put_cpu_fpsimd_context(); -+ /* -+ * If we are returning from a nested use of kernel mode FPSIMD, restore -+ * the task context kernel mode FPSIMD state. This can only happen when -+ * running in softirq context on non-PREEMPT_RT. -+ */ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && -+ test_thread_flag(TIF_KERNEL_FPSTATE)) -+ fpsimd_load_kernel_state(current); -+ else -+ clear_thread_flag(TIF_KERNEL_FPSTATE); - } - EXPORT_SYMBOL_GPL(kernel_neon_end); - --- -2.43.0 - diff --git a/queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch b/queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch new file mode 100644 index 00000000000..96f85f9573b --- /dev/null +++ b/queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch @@ -0,0 +1,39 @@ +From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001 +From: Hagar Hemdan +Date: Tue, 23 Apr 2024 13:59:26 +0000 +Subject: efi: libstub: only free priv.runtime_map when allocated + +From: Hagar Hemdan + +commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream. + +priv.runtime_map is only allocated when efi_novamap is not set. +Otherwise, it is an uninitialized value. In the error path, it is freed +unconditionally. Avoid passing an uninitialized value to free_pool. +Free priv.runtime_map only when it was allocated. + +This bug was discovered and resolved using Coverity Static Analysis +Security Testing (SAST) by Synopsys, Inc. + +Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map") +Cc: +Signed-off-by: Hagar Hemdan +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/fdt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/firmware/efi/libstub/fdt.c ++++ b/drivers/firmware/efi/libstub/fdt.c +@@ -335,8 +335,8 @@ fail_free_new_fdt: + + fail: + efi_free(fdt_size, fdt_addr); +- +- efi_bs_call(free_pool, priv.runtime_map); ++ if (!efi_novamap) ++ efi_bs_call(free_pool, priv.runtime_map); + + return EFI_LOAD_ERROR; + } diff --git a/queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch new file mode 100644 index 00000000000..c6c9cd45639 --- /dev/null +++ b/queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch @@ -0,0 +1,123 @@ +From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001 +From: Dongli Zhang +Date: Wed, 22 May 2024 15:02:18 -0700 +Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline + +From: Dongli Zhang + +commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream. + +The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of +interrupt affinity reconfiguration via procfs. Instead, the change is +deferred until the next instance of the interrupt being triggered on the +original CPU. + +When the interrupt next triggers on the original CPU, the new affinity is +enforced within __irq_move_irq(). A vector is allocated from the new CPU, +but the old vector on the original CPU remains and is not immediately +reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming +process is delayed until the next trigger of the interrupt on the new CPU. + +Upon the subsequent triggering of the interrupt on the new CPU, +irq_complete_move() adds a task to the old CPU's vector_cleanup list if it +remains online. Subsequently, the timer on the old CPU iterates over its +vector_cleanup list, reclaiming old vectors. + +However, a rare scenario arises if the old CPU is outgoing before the +interrupt triggers again on the new CPU. + +In that case irq_force_complete_move() is not invoked on the outgoing CPU +to reclaim the old apicd->prev_vector because the interrupt isn't currently +affine to the outgoing CPU, and irq_needs_fixup() returns false. Even +though __vector_schedule_cleanup() is later called on the new CPU, it +doesn't reclaim apicd->prev_vector; instead, it simply resets both +apicd->move_in_progress and apicd->prev_vector to 0. + +As a result, the vector remains unreclaimed in vector_matrix, leading to a +CPU vector leak. + +To address this issue, move the invocation of irq_force_complete_move() +before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the +interrupt is currently or used to be affine to the outgoing CPU. + +Additionally, reclaim the vector in __vector_schedule_cleanup() as well, +following a warning message, although theoretically it should never see +apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU. + +Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress") +Signed-off-by: Dongli Zhang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/apic/vector.c | 9 ++++++--- + kernel/irq/cpuhotplug.c | 16 ++++++++-------- + 2 files changed, 14 insertions(+), 11 deletions(-) + +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -1036,7 +1036,8 @@ static void __vector_schedule_cleanup(st + add_timer_on(&cl->timer, cpu); + } + } else { +- apicd->prev_vector = 0; ++ pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); ++ free_moved_vector(apicd); + } + raw_spin_unlock(&vector_lock); + } +@@ -1073,6 +1074,7 @@ void irq_complete_move(struct irq_cfg *c + */ + void irq_force_complete_move(struct irq_desc *desc) + { ++ unsigned int cpu = smp_processor_id(); + struct apic_chip_data *apicd; + struct irq_data *irqd; + unsigned int vector; +@@ -1097,10 +1099,11 @@ void irq_force_complete_move(struct irq_ + goto unlock; + + /* +- * If prev_vector is empty, no action required. ++ * If prev_vector is empty or the descriptor is neither currently ++ * nor previously on the outgoing CPU no action required. + */ + vector = apicd->prev_vector; +- if (!vector) ++ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) + goto unlock; + + /* +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d + } + + /* ++ * Complete an eventually pending irq move cleanup. If this ++ * interrupt was moved in hard irq context, then the vectors need ++ * to be cleaned up. It can't wait until this interrupt actually ++ * happens and this CPU was involved. ++ */ ++ irq_force_complete_move(desc); ++ ++ /* + * No move required, if: + * - Interrupt is per cpu + * - Interrupt is not started +@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d + } + + /* +- * Complete an eventually pending irq move cleanup. If this +- * interrupt was moved in hard irq context, then the vectors need +- * to be cleaned up. It can't wait until this interrupt actually +- * happens and this CPU was involved. +- */ +- irq_force_complete_move(desc); +- +- /* + * If there is a setaffinity pending, then try to reuse the pending + * mask, so the last change of the affinity does not get lost. If + * there is no move pending or the pending mask does not contain diff --git a/queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch new file mode 100644 index 00000000000..c7b03d69f83 --- /dev/null +++ b/queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch @@ -0,0 +1,74 @@ +From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 13 Mar 2024 13:58:42 +0100 +Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID + +From: Gerd Hoffmann + +commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream. + +Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16]) +to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths +to userspace via KVM_GET_SUPPORTED_CPUID. + +Per AMD, GuestPhysBits is intended for software use, and physical CPUs do +not set that field. I.e. GuestPhysBits will be non-zero if and only if +KVM is running as a nested hypervisor, and in that case, GuestPhysBits is +NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with +TDP enabled. + +E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum +*addressable* guest physical address, which would result in KVM under- +reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52, +but without 5-level paging. + +Signed-off-by: Gerd Hoffmann +Cc: stable@vger.kernel.org +Reviewed-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com +[sean: rewrite changelog with --verbose, Cc stable@] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1212,9 +1212,8 @@ static inline int __do_cpuid_func(struct + entry->eax = entry->ebx = entry->ecx = 0; + break; + case 0x80000008: { +- unsigned g_phys_as = (entry->eax >> 16) & 0xff; +- unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); +- unsigned phys_as = entry->eax & 0xff; ++ unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U); ++ unsigned int phys_as; + + /* + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as +@@ -1222,16 +1221,16 @@ static inline int __do_cpuid_func(struct + * reductions in MAXPHYADDR for memory encryption affect shadow + * paging, too. + * +- * If TDP is enabled but an explicit guest MAXPHYADDR is not +- * provided, use the raw bare metal MAXPHYADDR as reductions to +- * the HPAs do not affect GPAs. ++ * If TDP is enabled, use the raw bare metal MAXPHYADDR as ++ * reductions to the HPAs do not affect GPAs. + */ +- if (!tdp_enabled) +- g_phys_as = boot_cpu_data.x86_phys_bits; +- else if (!g_phys_as) +- g_phys_as = phys_as; ++ if (!tdp_enabled) { ++ phys_as = boot_cpu_data.x86_phys_bits; ++ } else { ++ phys_as = entry->eax & 0xff; ++ } + +- entry->eax = g_phys_as | (virt_as << 8); ++ entry->eax = phys_as | (virt_as << 8); + entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); + entry->edx = 0; + cpuid_entry_override(entry, CPUID_8000_0008_EBX); diff --git a/queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch b/queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch new file mode 100644 index 00000000000..1bf9f5caf1b --- /dev/null +++ b/queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch @@ -0,0 +1,42 @@ +From 2920141fc149f71bad22361946417bc43783ed7f Mon Sep 17 00:00:00 2001 +From: Srinivas Pandruvada +Date: Tue, 23 Apr 2024 13:46:10 -0700 +Subject: platform/x86/intel/tpmi: Handle error from tpmi_process_info() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Srinivas Pandruvada + +commit 2920141fc149f71bad22361946417bc43783ed7f upstream. + +When tpmi_process_info() returns error, fail to load the driver. +This can happen if call to ioremap() returns error. + +Signed-off-by: Srinivas Pandruvada +Reviewed-by: Ilpo Järvinen +Cc: stable@vger.kernel.org # v6.3+ +Link: https://lore.kernel.org/r/20240423204619.3946901-2-srinivas.pandruvada@linux.intel.com +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/tpmi.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/platform/x86/intel/tpmi.c ++++ b/drivers/platform/x86/intel/tpmi.c +@@ -733,8 +733,11 @@ static int intel_vsec_tpmi_init(struct a + * when actual device nodes created outside this + * loop via tpmi_create_devices(). + */ +- if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID) +- tpmi_process_info(tpmi_info, pfs); ++ if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID) { ++ ret = tpmi_process_info(tpmi_info, pfs); ++ if (ret) ++ return ret; ++ } + + if (pfs->pfs_header.tpmi_id == TPMI_CONTROL_ID) + tpmi_set_control_base(auxdev, tpmi_info, pfs); diff --git a/queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch b/queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch new file mode 100644 index 00000000000..563892e34c0 --- /dev/null +++ b/queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch @@ -0,0 +1,50 @@ +From db643cb7ebe524d17b4b13583dda03485d4a1bc0 Mon Sep 17 00:00:00 2001 +From: Srinivas Pandruvada +Date: Mon, 15 Apr 2024 14:52:10 -0700 +Subject: platform/x86/intel-uncore-freq: Don't present root domain on error + +From: Srinivas Pandruvada + +commit db643cb7ebe524d17b4b13583dda03485d4a1bc0 upstream. + +If none of the clusters are added because of some error, fail to load +driver without presenting root domain. In this case root domain will +present invalid data. + +Signed-off-by: Srinivas Pandruvada +Fixes: 01c10f88c9b7 ("platform/x86/intel-uncore-freq: tpmi: Provide cluster level control") +Cc: # 6.5+ +Link: https://lore.kernel.org/r/20240415215210.2824868-1-srinivas.pandruvada@linux.intel.com +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c ++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +@@ -234,6 +234,7 @@ static int uncore_probe(struct auxiliary + { + struct intel_tpmi_plat_info *plat_info; + struct tpmi_uncore_struct *tpmi_uncore; ++ bool uncore_sysfs_added = false; + int ret, i, pkg = 0; + int num_resources; + +@@ -359,9 +360,15 @@ static int uncore_probe(struct auxiliary + } + /* Point to next cluster offset */ + cluster_offset >>= UNCORE_MAX_CLUSTER_PER_DOMAIN; ++ uncore_sysfs_added = true; + } + } + ++ if (!uncore_sysfs_added) { ++ ret = -ENODEV; ++ goto remove_clusters; ++ } ++ + auxiliary_set_drvdata(auxdev, tpmi_uncore); + + tpmi_uncore->root_cluster.root_domain = true; diff --git a/queue-6.6/series b/queue-6.6/series index 665d092ca6f..8b647186560 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -548,8 +548,6 @@ iio-accel-mxc4005-allow-module-autoloading-via-of-co.patch iio-accel-mxc4005-reset-chip-on-probe-and-resume.patch misc-pvpanic-deduplicate-common-code.patch misc-pvpanic-pci-register-attributes-via-pci_driver.patch -arm64-fpsimd-drop-unneeded-busy-flag.patch -arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch arm64-fpsimd-implement-lazy-restore-for-kernel-mode-.patch revert-arm64-fpsimd-implement-lazy-restore-for-kerne.patch arm64-fpsimd-avoid-erroneous-elide-of-user-state-rel.patch @@ -741,3 +739,10 @@ hwmon-shtc1-fix-property-misspelling.patch riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch alsa-seq-ump-fix-swapped-song-position-pointer-data.patch alsa-timer-set-lower-bound-of-start-tick-time.patch +x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch +efi-libstub-only-free-priv.runtime_map-when-allocated.patch +x86-pci-skip-early-e820-check-for-ecam-region.patch +kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch +genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch +platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch +platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch diff --git a/queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch b/queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch new file mode 100644 index 00000000000..81ddf450ce8 --- /dev/null +++ b/queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch @@ -0,0 +1,107 @@ +From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 16 May 2024 11:05:42 +0200 +Subject: x86/efistub: Omit physical KASLR when memory reservations exist + +From: Ard Biesheuvel + +commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream. + +The legacy decompressor has elaborate logic to ensure that the +randomized physical placement of the decompressed kernel image does not +conflict with any memory reservations, including ones specified on the +command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are +taken into account by the kernel proper at a later stage. + +When booting in EFI mode, it is the firmware's job to ensure that the +chosen range does not conflict with any memory reservations that it +knows about, and this is trivially achieved by using the firmware's +memory allocation APIs. + +That leaves reservations specified on the command line, though, which +the firmware knows nothing about, as these regions have no other special +significance to the platform. Since commit + + a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") + +these reservations are not taken into account when randomizing the +physical placement, which may result in conflicts where the memory +cannot be reserved by the kernel proper because its own executable image +resides there. + +To avoid having to duplicate or reuse the existing complicated logic, +disable physical KASLR entirely when such overrides are specified. These +are mostly diagnostic tools or niche features, and physical KASLR (as +opposed to virtual KASLR, which is much more important as it affects the +memory addresses observed by code executing in the kernel) is something +we can live without. + +Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com +Reported-by: Ben Chaney +Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") +Cc: # v6.1+ +Reviewed-by: Kees Cook +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/x86-stub.c | 28 ++++++++++++++++++++++++++-- + 1 file changed, 26 insertions(+), 2 deletions(-) + +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -776,6 +776,26 @@ static void error(char *str) + efi_warn("Decompression failed: %s\n", str); + } + ++static const char *cmdline_memmap_override; ++ ++static efi_status_t parse_options(const char *cmdline) ++{ ++ static const char opts[][14] = { ++ "mem=", "memmap=", "efi_fake_mem=", "hugepages=" ++ }; ++ ++ for (int i = 0; i < ARRAY_SIZE(opts); i++) { ++ const char *p = strstr(cmdline, opts[i]); ++ ++ if (p == cmdline || (p > cmdline && isspace(p[-1]))) { ++ cmdline_memmap_override = opts[i]; ++ break; ++ } ++ } ++ ++ return efi_parse_options(cmdline); ++} ++ + static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) + { + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; +@@ -807,6 +827,10 @@ static efi_status_t efi_decompress_kerne + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; ++ } else if (cmdline_memmap_override) { ++ efi_info("%s detected on the kernel command line - disabling physical KASLR\n", ++ cmdline_memmap_override); ++ seed[0] = 0; + } + } + +@@ -881,7 +905,7 @@ void __noreturn efi_stub_entry(efi_handl + } + + #ifdef CONFIG_CMDLINE_BOOL +- status = efi_parse_options(CONFIG_CMDLINE); ++ status = parse_options(CONFIG_CMDLINE); + if (status != EFI_SUCCESS) { + efi_err("Failed to parse options\n"); + goto fail; +@@ -890,7 +914,7 @@ void __noreturn efi_stub_entry(efi_handl + if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) { + unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr | + ((u64)boot_params->ext_cmd_line_ptr << 32)); +- status = efi_parse_options((char *)cmdline_paddr); ++ status = parse_options((char *)cmdline_paddr); + if (status != EFI_SUCCESS) { + efi_err("Failed to parse options\n"); + goto fail; diff --git a/queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch b/queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch new file mode 100644 index 00000000000..5cae82a2495 --- /dev/null +++ b/queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch @@ -0,0 +1,131 @@ +From 199f968f1484a14024d0d467211ffc2faf193eb4 Mon Sep 17 00:00:00 2001 +From: Bjorn Helgaas +Date: Wed, 17 Apr 2024 15:40:12 -0500 +Subject: x86/pci: Skip early E820 check for ECAM region + +From: Bjorn Helgaas + +commit 199f968f1484a14024d0d467211ffc2faf193eb4 upstream. + +Arul, Mateusz, Imcarneiro91, and Aman reported a regression caused by +07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map"). On the +Lenovo Legion 9i laptop, that commit removes the ECAM area from E820, which +means the early E820 validation fails, which means we don't enable ECAM in +the "early MCFG" path. + +The static MCFG table describes ECAM without depending on the ACPI +interpreter. Many Legion 9i ACPI methods rely on that, so they fail when +PCI config access isn't available, resulting in the embedded controller, +PS/2, audio, trackpad, and battery devices not being detected. The _OSC +method also fails, so Linux can't take control of the PCIe hotplug, PME, +and AER features: + + # pci_mmcfg_early_init() + + PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0] + PCI: not using ECAM ([mem 0xc0000000-0xce0fffff] not reserved) + + ACPI Error: AE_ERROR, Returned by Handler for [PCI_Config] (20230628/evregion-300) + ACPI: Interpreter enabled + ACPI: Ignoring error and continuing table load + ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162) + ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220) + ACPI: Skipping parse of AML opcode: OpcodeName unavailable (0x0010) + ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162) + ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220) + ... + ACPI Error: Aborting method \_SB.PC00._OSC due to previous error (AE_NOT_FOUND) (20230628/psparse-529) + acpi PNP0A08:00: _OSC: platform retains control of PCIe features (AE_NOT_FOUND) + + # pci_mmcfg_late_init() + + PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0] + PCI: [Firmware Info]: ECAM [mem 0xc0000000-0xce0fffff] not reserved in ACPI motherboard resources + PCI: ECAM [mem 0xc0000000-0xce0fffff] is EfiMemoryMappedIO; assuming valid + PCI: ECAM [mem 0xc0000000-0xce0fffff] reserved to work around lack of ACPI motherboard _CRS + +Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be reserved by a PNP0C02 +resource, but there's no requirement to mention it in E820, so we shouldn't +look at E820 to validate the ECAM space described by MCFG. + +In 2006, 946f2ee5c731 ("[PATCH] i386/x86-64: Check that MCFG points to an +e820 reserved area") added a sanity check of E820 to work around buggy MCFG +tables, but that over-aggressive validation causes failures like this one. + +Keep the E820 validation check for machines older than 2016, an arbitrary +ten years after 946f2ee5c731, so machines that depend on it don't break. + +Skip the early E820 check for 2016 and newer BIOSes since there's no +requirement to describe ECAM in E820. + +Link: https://lore.kernel.org/r/20240417204012.215030-2-helgaas@kernel.org +Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map") +Reported-by: Mateusz Kaduk +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218444 +Signed-off-by: Bjorn Helgaas +Tested-by: Mateusz Kaduk +Reviewed-by: Andy Shevchenko +Reviewed-by: Hans de Goede +Reviewed-by: Kuppuswamy Sathyanarayanan +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/pci/mmconfig-shared.c | 40 +++++++++++++++++++++++++++++----------- + 1 file changed, 29 insertions(+), 11 deletions(-) + +--- a/arch/x86/pci/mmconfig-shared.c ++++ b/arch/x86/pci/mmconfig-shared.c +@@ -527,7 +527,34 @@ pci_mmcfg_check_reserved(struct device * + { + struct resource *conflict; + +- if (!early && !acpi_disabled) { ++ if (early) { ++ ++ /* ++ * Don't try to do this check unless configuration type 1 ++ * is available. How about type 2? ++ */ ++ ++ /* ++ * 946f2ee5c731 ("Check that MCFG points to an e820 ++ * reserved area") added this E820 check in 2006 to work ++ * around BIOS defects. ++ * ++ * Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be ++ * reserved by a PNP0C02 resource, but it need not be ++ * mentioned in E820. Before the ACPI interpreter is ++ * available, we can't check for PNP0C02 resources, so ++ * there's no reliable way to verify the region in this ++ * early check. Keep it only for the old machines that ++ * motivated 946f2ee5c731. ++ */ ++ if (dmi_get_bios_year() < 2016 && raw_pci_ops) ++ return is_mmconf_reserved(e820__mapped_all, cfg, dev, ++ "E820 entry"); ++ ++ return true; ++ } ++ ++ if (!acpi_disabled) { + if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, + "ACPI motherboard resource")) + return true; +@@ -563,16 +590,7 @@ pci_mmcfg_check_reserved(struct device * + * For MCFG information constructed from hotpluggable host bridge's + * _CBA method, just assume it's reserved. + */ +- if (pci_mmcfg_running_state) +- return true; +- +- /* Don't try to do this check unless configuration +- type 1 is available. how about type 2 ?*/ +- if (raw_pci_ops) +- return is_mmconf_reserved(e820__mapped_all, cfg, dev, +- "E820 entry"); +- +- return false; ++ return pci_mmcfg_running_state; + } + + static void __init pci_mmcfg_reject_broken(int early) -- 2.47.3