]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2024 13:13:39 +0000 (15:13 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2024 13:13:39 +0000 (15:13 +0200)
added patches:
efi-libstub-only-free-priv.runtime_map-when-allocated.patch
genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
x86-pci-skip-early-e820-check-for-ecam-region.patch

queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch [deleted file]
queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch [deleted file]
queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch [new file with mode: 0644]
queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch [new file with mode: 0644]
queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch [new file with mode: 0644]
queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch [new file with mode: 0644]
queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch [new file with mode: 0644]
queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch [new file with mode: 0644]

diff --git a/queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch b/queue-6.6/arm64-fpsimd-drop-unneeded-busy-flag.patch
deleted file mode 100644 (file)
index bb2e8c8..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-From 37f2773a1ef05374538d5e4ed26cbacebe363241 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 8 Dec 2023 12:32:20 +0100
-Subject: arm64: fpsimd: Drop unneeded 'busy' flag
-
-From: Ard Biesheuvel <ardb@kernel.org>
-
-[ Upstream commit 9b19700e623f96222c69ecb2adecb1a3e3664cc0 ]
-
-Kernel mode NEON will preserve the user mode FPSIMD state by saving it
-into the task struct before clobbering the registers. In order to avoid
-the need for preserving kernel mode state too, we disallow nested use of
-kernel mode NEON, i..e, use in softirq context while the interrupted
-task context was using kernel mode NEON too.
-
-Originally, this policy was implemented using a per-CPU flag which was
-exposed via may_use_simd(), requiring the users of the kernel mode NEON
-to deal with the possibility that it might return false, and having NEON
-and non-NEON code paths. This policy was changed by commit
-13150149aa6ded1 ("arm64: fpsimd: run kernel mode NEON with softirqs
-disabled"), and now, softirq processing is disabled entirely instead,
-and so may_use_simd() can never fail when called from task or softirq
-context.
-
-This means we can drop the fpsimd_context_busy flag entirely, and
-instead, ensure that we disable softirq processing in places where we
-formerly relied on the flag for preventing races in the FPSIMD preserve
-routines.
-
-Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-Reviewed-by: Mark Brown <broonie@kernel.org>
-Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
-Link: https://lore.kernel.org/r/20231208113218.3001940-7-ardb@google.com
-[will: Folded in fix from CAMj1kXFhzbJRyWHELCivQW1yJaF=p07LLtbuyXYX3G1WtsdyQg@mail.gmail.com]
-Signed-off-by: Will Deacon <will@kernel.org>
-Stable-dep-of: b8995a184170 ("Revert "arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD"")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/arm64/include/asm/simd.h | 11 +------
- arch/arm64/kernel/fpsimd.c    | 55 +++++++++--------------------------
- 2 files changed, 15 insertions(+), 51 deletions(-)
-
-diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
-index 6a75d7ecdcaa2..8e86c9e70e483 100644
---- a/arch/arm64/include/asm/simd.h
-+++ b/arch/arm64/include/asm/simd.h
-@@ -12,8 +12,6 @@
- #include <linux/preempt.h>
- #include <linux/types.h>
--DECLARE_PER_CPU(bool, fpsimd_context_busy);
--
- #ifdef CONFIG_KERNEL_MODE_NEON
- /*
-@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
-       /*
-        * We must make sure that the SVE has been initialized properly
-        * before using the SIMD in kernel.
--       * fpsimd_context_busy is only set while preemption is disabled,
--       * and is clear whenever preemption is enabled. Since
--       * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
--       * cannot change under our feet -- if it's set we cannot be
--       * migrated, and if it's clear we cannot be migrated to a CPU
--       * where it is set.
-        */
-       return !WARN_ON(!system_capabilities_finalized()) &&
-              system_supports_fpsimd() &&
--             !in_hardirq() && !irqs_disabled() && !in_nmi() &&
--             !this_cpu_read(fpsimd_context_busy);
-+             !in_hardirq() && !irqs_disabled() && !in_nmi();
- }
- #else /* ! CONFIG_KERNEL_MODE_NEON */
-diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
-index 5cdfcc9e3e54b..b805bdab284c4 100644
---- a/arch/arm64/kernel/fpsimd.c
-+++ b/arch/arm64/kernel/fpsimd.c
-@@ -85,13 +85,13 @@
-  * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
-  * flag the register state as invalid.
-  *
-- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
-- * save the task's FPSIMD context back to task_struct from softirq context.
-- * To prevent this from racing with the manipulation of the task's FPSIMD state
-- * from task context and thereby corrupting the state, it is necessary to
-- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
-- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
-- * run but prevent them to use FPSIMD.
-+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
-+ * called from softirq context, which will save the task's FPSIMD context back
-+ * to task_struct. To prevent this from racing with the manipulation of the
-+ * task's FPSIMD state from task context and thereby corrupting the state, it
-+ * is necessary to protect any manipulation of a task's fpsimd_state or
-+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
-+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
-  *
-  * For a certain task, the sequence may look something like this:
-  * - the task gets scheduled in; if both the task's fpsimd_cpu field
-@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
- #endif
--DEFINE_PER_CPU(bool, fpsimd_context_busy);
--EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
--
- static void fpsimd_bind_task_to_cpu(void);
--static void __get_cpu_fpsimd_context(void)
--{
--      bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
--
--      WARN_ON(busy);
--}
--
- /*
-  * Claim ownership of the CPU FPSIMD context for use by the calling context.
-  *
-  * The caller may freely manipulate the FPSIMD context metadata until
-  * put_cpu_fpsimd_context() is called.
-  *
-- * The double-underscore version must only be called if you know the task
-- * can't be preempted.
-- *
-  * On RT kernels local_bh_disable() is not sufficient because it only
-  * serializes soft interrupt related sections via a local lock, but stays
-  * preemptible. Disabling preemption is the right choice here as bottom
-@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
-               local_bh_disable();
-       else
-               preempt_disable();
--      __get_cpu_fpsimd_context();
--}
--
--static void __put_cpu_fpsimd_context(void)
--{
--      bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
--
--      WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
- }
- /*
-@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
-  */
- static void put_cpu_fpsimd_context(void)
- {
--      __put_cpu_fpsimd_context();
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-               local_bh_enable();
-       else
-               preempt_enable();
- }
--static bool have_cpu_fpsimd_context(void)
--{
--      return !preemptible() && __this_cpu_read(fpsimd_context_busy);
--}
--
- unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
- {
-       return task->thread.vl[type];
-@@ -383,7 +356,7 @@ static void task_fpsimd_load(void)
-       bool restore_ffr;
-       WARN_ON(!system_supports_fpsimd());
--      WARN_ON(!have_cpu_fpsimd_context());
-+      WARN_ON(preemptible());
-       if (system_supports_sve() || system_supports_sme()) {
-               switch (current->thread.fp_type) {
-@@ -467,7 +440,7 @@ static void fpsimd_save(void)
-       unsigned int vl;
-       WARN_ON(!system_supports_fpsimd());
--      WARN_ON(!have_cpu_fpsimd_context());
-+      WARN_ON(preemptible());
-       if (test_thread_flag(TIF_FOREIGN_FPSTATE))
-               return;
-@@ -1583,7 +1556,7 @@ void fpsimd_thread_switch(struct task_struct *next)
-       if (!system_supports_fpsimd())
-               return;
--      __get_cpu_fpsimd_context();
-+      WARN_ON_ONCE(!irqs_disabled());
-       /* Save unsaved fpsimd state, if any: */
-       fpsimd_save();
-@@ -1599,8 +1572,6 @@ void fpsimd_thread_switch(struct task_struct *next)
-       update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
-                              wrong_task || wrong_cpu);
--
--      __put_cpu_fpsimd_context();
- }
- static void fpsimd_flush_thread_vl(enum vec_type type)
-@@ -1892,13 +1863,15 @@ static void fpsimd_flush_cpu_state(void)
-  */
- void fpsimd_save_and_flush_cpu_state(void)
- {
-+      unsigned long flags;
-+
-       if (!system_supports_fpsimd())
-               return;
-       WARN_ON(preemptible());
--      __get_cpu_fpsimd_context();
-+      local_irq_save(flags);
-       fpsimd_save();
-       fpsimd_flush_cpu_state();
--      __put_cpu_fpsimd_context();
-+      local_irq_restore(flags);
- }
- #ifdef CONFIG_KERNEL_MODE_NEON
--- 
-2.43.0
-
diff --git a/queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch b/queue-6.6/arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch
deleted file mode 100644 (file)
index dca376b..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-From bf3880ed007142c4d7ff620fb293c8c2becd58ce Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 8 Dec 2023 12:32:21 +0100
-Subject: arm64: fpsimd: Preserve/restore kernel mode NEON at context switch
-
-From: Ard Biesheuvel <ardb@kernel.org>
-
-[ Upstream commit aefbab8e77eb16b56e18f24b85a09ebf4dc60e93 ]
-
-Currently, the FPSIMD register file is not preserved and restored along
-with the general registers on exception entry/exit or context switch.
-For this reason, we disable preemption when enabling FPSIMD for kernel
-mode use in task context, and suspend the processing of softirqs so that
-there are no concurrent uses in the kernel. (Kernel mode FPSIMD may not
-be used at all in other contexts).
-
-Disabling preemption while doing CPU intensive work on inputs of
-potentially unbounded size is bad for real-time performance, which is
-why we try and ensure that SIMD crypto code does not operate on more
-than ~4k at a time, which is an arbitrary limit and requires assembler
-code to implement efficiently.
-
-We can avoid the need for disabling preemption if we can ensure that any
-in-kernel users of the NEON will not lose the FPSIMD register state
-across a context switch. And given that disabling softirqs implicitly
-disables preemption as well, we will also have to ensure that a softirq
-that runs code using FPSIMD can safely interrupt an in-kernel user.
-
-So introduce a thread_info flag TIF_KERNEL_FPSTATE, and modify the
-context switch hook for FPSIMD to preserve and restore the kernel mode
-FPSIMD to/from struct thread_struct when it is set. This avoids any
-scheduling blackouts due to prolonged use of FPSIMD in kernel mode,
-without the need for manual yielding.
-
-In order to support softirq processing while FPSIMD is being used in
-kernel task context, use the same flag to decide whether the kernel mode
-FPSIMD state needs to be preserved and restored before allowing FPSIMD
-to be used in softirq context.
-
-Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-Reviewed-by: Mark Brown <broonie@kernel.org>
-Reviewed-by: Mark Rutland <mark.rutland@arm.com>
-Link: https://lore.kernel.org/r/20231208113218.3001940-8-ardb@google.com
-Signed-off-by: Will Deacon <will@kernel.org>
-Stable-dep-of: b8995a184170 ("Revert "arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD"")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/arm64/include/asm/processor.h   |  2 +
- arch/arm64/include/asm/thread_info.h |  1 +
- arch/arm64/kernel/fpsimd.c           | 92 ++++++++++++++++++++++------
- 3 files changed, 77 insertions(+), 18 deletions(-)
-
-diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
-index e5bc54522e711..ce6eebd6c08bd 100644
---- a/arch/arm64/include/asm/processor.h
-+++ b/arch/arm64/include/asm/processor.h
-@@ -167,6 +167,8 @@ struct thread_struct {
-       unsigned long           fault_address;  /* fault info */
-       unsigned long           fault_code;     /* ESR_EL1 value */
-       struct debug_info       debug;          /* debugging */
-+
-+      struct user_fpsimd_state        kernel_fpsimd_state;
- #ifdef CONFIG_ARM64_PTR_AUTH
-       struct ptrauth_keys_user        keys_user;
- #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
-diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
-index 553d1bc559c60..e72a3bf9e5634 100644
---- a/arch/arm64/include/asm/thread_info.h
-+++ b/arch/arm64/include/asm/thread_info.h
-@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
- #define TIF_TAGGED_ADDR               26      /* Allow tagged user addresses */
- #define TIF_SME                       27      /* SME in use */
- #define TIF_SME_VL_INHERIT    28      /* Inherit SME vl_onexec across exec */
-+#define TIF_KERNEL_FPSTATE    29      /* Task is in a kernel mode FPSIMD section */
- #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
- #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
-diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
-index b805bdab284c4..aa695057c93dd 100644
---- a/arch/arm64/kernel/fpsimd.c
-+++ b/arch/arm64/kernel/fpsimd.c
-@@ -357,6 +357,7 @@ static void task_fpsimd_load(void)
-       WARN_ON(!system_supports_fpsimd());
-       WARN_ON(preemptible());
-+      WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
-       if (system_supports_sve() || system_supports_sme()) {
-               switch (current->thread.fp_type) {
-@@ -379,7 +380,7 @@ static void task_fpsimd_load(void)
-               default:
-                       /*
-                        * This indicates either a bug in
--                       * fpsimd_save() or memory corruption, we
-+                       * fpsimd_save_user_state() or memory corruption, we
-                        * should always record an explicit format
-                        * when we save. We always at least have the
-                        * memory allocated for FPSMID registers so
-@@ -430,7 +431,7 @@ static void task_fpsimd_load(void)
-  * than via current, if we are saving KVM state then it will have
-  * ensured that the type of registers to save is set in last->to_save.
-  */
--static void fpsimd_save(void)
-+static void fpsimd_save_user_state(void)
- {
-       struct cpu_fp_state const *last =
-               this_cpu_ptr(&fpsimd_last_state);
-@@ -863,7 +864,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
-       if (task == current) {
-               get_cpu_fpsimd_context();
--              fpsimd_save();
-+              fpsimd_save_user_state();
-       }
-       fpsimd_flush_task_state(task);
-@@ -1549,6 +1550,16 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
-                      current);
- }
-+static void fpsimd_load_kernel_state(struct task_struct *task)
-+{
-+      fpsimd_load_state(&task->thread.kernel_fpsimd_state);
-+}
-+
-+static void fpsimd_save_kernel_state(struct task_struct *task)
-+{
-+      fpsimd_save_state(&task->thread.kernel_fpsimd_state);
-+}
-+
- void fpsimd_thread_switch(struct task_struct *next)
- {
-       bool wrong_task, wrong_cpu;
-@@ -1559,19 +1570,28 @@ void fpsimd_thread_switch(struct task_struct *next)
-       WARN_ON_ONCE(!irqs_disabled());
-       /* Save unsaved fpsimd state, if any: */
--      fpsimd_save();
-+      if (test_thread_flag(TIF_KERNEL_FPSTATE))
-+              fpsimd_save_kernel_state(current);
-+      else
-+              fpsimd_save_user_state();
--      /*
--       * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
--       * state.  For kernel threads, FPSIMD registers are never loaded
--       * and wrong_task and wrong_cpu will always be true.
--       */
--      wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
--                                      &next->thread.uw.fpsimd_state;
--      wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
-+      if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
-+              fpsimd_load_kernel_state(next);
-+              set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-+      } else {
-+              /*
-+               * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
-+               * state.  For kernel threads, FPSIMD registers are never
-+               * loaded with user mode FPSIMD state and so wrong_task and
-+               * wrong_cpu will always be true.
-+               */
-+              wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
-+                      &next->thread.uw.fpsimd_state;
-+              wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
--      update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
--                             wrong_task || wrong_cpu);
-+              update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
-+                                     wrong_task || wrong_cpu);
-+      }
- }
- static void fpsimd_flush_thread_vl(enum vec_type type)
-@@ -1661,7 +1681,7 @@ void fpsimd_preserve_current_state(void)
-               return;
-       get_cpu_fpsimd_context();
--      fpsimd_save();
-+      fpsimd_save_user_state();
-       put_cpu_fpsimd_context();
- }
-@@ -1869,7 +1889,7 @@ void fpsimd_save_and_flush_cpu_state(void)
-               return;
-       WARN_ON(preemptible());
-       local_irq_save(flags);
--      fpsimd_save();
-+      fpsimd_save_user_state();
-       fpsimd_flush_cpu_state();
-       local_irq_restore(flags);
- }
-@@ -1903,10 +1923,37 @@ void kernel_neon_begin(void)
-       get_cpu_fpsimd_context();
-       /* Save unsaved fpsimd state, if any: */
--      fpsimd_save();
-+      if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
-+              BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
-+              fpsimd_save_kernel_state(current);
-+      } else {
-+              fpsimd_save_user_state();
-+
-+              /*
-+               * Set the thread flag so that the kernel mode FPSIMD state
-+               * will be context switched along with the rest of the task
-+               * state.
-+               *
-+               * On non-PREEMPT_RT, softirqs may interrupt task level kernel
-+               * mode FPSIMD, but the task will not be preemptible so setting
-+               * TIF_KERNEL_FPSTATE for those would be both wrong (as it
-+               * would mark the task context FPSIMD state as requiring a
-+               * context switch) and unnecessary.
-+               *
-+               * On PREEMPT_RT, softirqs are serviced from a separate thread,
-+               * which is scheduled as usual, and this guarantees that these
-+               * softirqs are not interrupting use of the FPSIMD in kernel
-+               * mode in task context. So in this case, setting the flag here
-+               * is always appropriate.
-+               */
-+              if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
-+                      set_thread_flag(TIF_KERNEL_FPSTATE);
-+      }
-       /* Invalidate any task state remaining in the fpsimd regs: */
-       fpsimd_flush_cpu_state();
-+
-+      put_cpu_fpsimd_context();
- }
- EXPORT_SYMBOL_GPL(kernel_neon_begin);
-@@ -1924,7 +1971,16 @@ void kernel_neon_end(void)
-       if (!system_supports_fpsimd())
-               return;
--      put_cpu_fpsimd_context();
-+      /*
-+       * If we are returning from a nested use of kernel mode FPSIMD, restore
-+       * the task context kernel mode FPSIMD state. This can only happen when
-+       * running in softirq context on non-PREEMPT_RT.
-+       */
-+      if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
-+          test_thread_flag(TIF_KERNEL_FPSTATE))
-+              fpsimd_load_kernel_state(current);
-+      else
-+              clear_thread_flag(TIF_KERNEL_FPSTATE);
- }
- EXPORT_SYMBOL_GPL(kernel_neon_end);
--- 
-2.43.0
-
diff --git a/queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch b/queue-6.6/efi-libstub-only-free-priv.runtime_map-when-allocated.patch
new file mode 100644 (file)
index 0000000..96f85f9
--- /dev/null
@@ -0,0 +1,39 @@
+From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001
+From: Hagar Hemdan <hagarhem@amazon.com>
+Date: Tue, 23 Apr 2024 13:59:26 +0000
+Subject: efi: libstub: only free priv.runtime_map when allocated
+
+From: Hagar Hemdan <hagarhem@amazon.com>
+
+commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream.
+
+priv.runtime_map is only allocated when efi_novamap is not set.
+Otherwise, it is an uninitialized value.  In the error path, it is freed
+unconditionally.  Avoid passing an uninitialized value to free_pool.
+Free priv.runtime_map only when it was allocated.
+
+This bug was discovered and resolved using Coverity Static Analysis
+Security Testing (SAST) by Synopsys, Inc.
+
+Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/fdt.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -335,8 +335,8 @@ fail_free_new_fdt:
+ fail:
+       efi_free(fdt_size, fdt_addr);
+-
+-      efi_bs_call(free_pool, priv.runtime_map);
++      if (!efi_novamap)
++              efi_bs_call(free_pool, priv.runtime_map);
+       return EFI_LOAD_ERROR;
+ }
diff --git a/queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-6.6/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
new file mode 100644 (file)
index 0000000..c6c9cd4
--- /dev/null
@@ -0,0 +1,123 @@
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c |    9 ++++++---
+ kernel/irq/cpuhotplug.c       |   16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -1036,7 +1036,8 @@ static void __vector_schedule_cleanup(st
+                       add_timer_on(&cl->timer, cpu);
+               }
+       } else {
+-              apicd->prev_vector = 0;
++              pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++              free_moved_vector(apicd);
+       }
+       raw_spin_unlock(&vector_lock);
+ }
+@@ -1073,6 +1074,7 @@ void irq_complete_move(struct irq_cfg *c
+  */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++      unsigned int cpu = smp_processor_id();
+       struct apic_chip_data *apicd;
+       struct irq_data *irqd;
+       unsigned int vector;
+@@ -1097,10 +1099,11 @@ void irq_force_complete_move(struct irq_
+               goto unlock;
+       /*
+-       * If prev_vector is empty, no action required.
++       * If prev_vector is empty or the descriptor is neither currently
++       * nor previously on the outgoing CPU no action required.
+        */
+       vector = apicd->prev_vector;
+-      if (!vector)
++      if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+               goto unlock;
+       /*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+       }
+       /*
++       * Complete an eventually pending irq move cleanup. If this
++       * interrupt was moved in hard irq context, then the vectors need
++       * to be cleaned up. It can't wait until this interrupt actually
++       * happens and this CPU was involved.
++       */
++      irq_force_complete_move(desc);
++
++      /*
+        * No move required, if:
+        * - Interrupt is per cpu
+        * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+       }
+       /*
+-       * Complete an eventually pending irq move cleanup. If this
+-       * interrupt was moved in hard irq context, then the vectors need
+-       * to be cleaned up. It can't wait until this interrupt actually
+-       * happens and this CPU was involved.
+-       */
+-      irq_force_complete_move(desc);
+-
+-      /*
+        * If there is a setaffinity pending, then try to reuse the pending
+        * mask, so the last change of the affinity does not get lost. If
+        * there is no move pending or the pending mask does not contain
diff --git a/queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-6.6/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
new file mode 100644 (file)
index 0000000..c7b03d6
--- /dev/null
@@ -0,0 +1,74 @@
+From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Wed, 13 Mar 2024 13:58:42 +0100
+Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID
+
+From: Gerd Hoffmann <kraxel@redhat.com>
+
+commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream.
+
+Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16])
+to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths
+to userspace via KVM_GET_SUPPORTED_CPUID.
+
+Per AMD, GuestPhysBits is intended for software use, and physical CPUs do
+not set that field.  I.e. GuestPhysBits will be non-zero if and only if
+KVM is running as a nested hypervisor, and in that case, GuestPhysBits is
+NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with
+TDP enabled.
+
+E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum
+*addressable* guest physical address, which would result in KVM under-
+reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52,
+but without 5-level paging.
+
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com
+[sean: rewrite changelog with --verbose, Cc stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -1212,9 +1212,8 @@ static inline int __do_cpuid_func(struct
+               entry->eax = entry->ebx = entry->ecx = 0;
+               break;
+       case 0x80000008: {
+-              unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+-              unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+-              unsigned phys_as = entry->eax & 0xff;
++              unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
++              unsigned int phys_as;
+               /*
+                * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
+@@ -1222,16 +1221,16 @@ static inline int __do_cpuid_func(struct
+                * reductions in MAXPHYADDR for memory encryption affect shadow
+                * paging, too.
+                *
+-               * If TDP is enabled but an explicit guest MAXPHYADDR is not
+-               * provided, use the raw bare metal MAXPHYADDR as reductions to
+-               * the HPAs do not affect GPAs.
++               * If TDP is enabled, use the raw bare metal MAXPHYADDR as
++               * reductions to the HPAs do not affect GPAs.
+                */
+-              if (!tdp_enabled)
+-                      g_phys_as = boot_cpu_data.x86_phys_bits;
+-              else if (!g_phys_as)
+-                      g_phys_as = phys_as;
++              if (!tdp_enabled) {
++                      phys_as = boot_cpu_data.x86_phys_bits;
++              } else {
++                      phys_as = entry->eax & 0xff;
++              }
+-              entry->eax = g_phys_as | (virt_as << 8);
++              entry->eax = phys_as | (virt_as << 8);
+               entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+               entry->edx = 0;
+               cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch b/queue-6.6/platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
new file mode 100644 (file)
index 0000000..1bf9f5c
--- /dev/null
@@ -0,0 +1,42 @@
+From 2920141fc149f71bad22361946417bc43783ed7f Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Tue, 23 Apr 2024 13:46:10 -0700
+Subject: platform/x86/intel/tpmi: Handle error from tpmi_process_info()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 2920141fc149f71bad22361946417bc43783ed7f upstream.
+
+When tpmi_process_info() returns error, fail to load the driver.
+This can happen if call to ioremap() returns error.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.3+
+Link: https://lore.kernel.org/r/20240423204619.3946901-2-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/tpmi.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/platform/x86/intel/tpmi.c
++++ b/drivers/platform/x86/intel/tpmi.c
+@@ -733,8 +733,11 @@ static int intel_vsec_tpmi_init(struct a
+                * when actual device nodes created outside this
+                * loop via tpmi_create_devices().
+                */
+-              if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID)
+-                      tpmi_process_info(tpmi_info, pfs);
++              if (pfs->pfs_header.tpmi_id == TPMI_INFO_ID) {
++                      ret = tpmi_process_info(tpmi_info, pfs);
++                      if (ret)
++                              return ret;
++              }
+               if (pfs->pfs_header.tpmi_id == TPMI_CONTROL_ID)
+                       tpmi_set_control_base(auxdev, tpmi_info, pfs);
diff --git a/queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch b/queue-6.6/platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
new file mode 100644 (file)
index 0000000..563892e
--- /dev/null
@@ -0,0 +1,50 @@
+From db643cb7ebe524d17b4b13583dda03485d4a1bc0 Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Mon, 15 Apr 2024 14:52:10 -0700
+Subject: platform/x86/intel-uncore-freq: Don't present root domain on error
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit db643cb7ebe524d17b4b13583dda03485d4a1bc0 upstream.
+
+If none of the clusters are added because of some error, fail to load
+driver without presenting root domain. In this case root domain will
+present invalid data.
+
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Fixes: 01c10f88c9b7 ("platform/x86/intel-uncore-freq: tpmi: Provide cluster level control")
+Cc: <stable@vger.kernel.org> # 6.5+
+Link: https://lore.kernel.org/r/20240415215210.2824868-1-srinivas.pandruvada@linux.intel.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+@@ -234,6 +234,7 @@ static int uncore_probe(struct auxiliary
+ {
+       struct intel_tpmi_plat_info *plat_info;
+       struct tpmi_uncore_struct *tpmi_uncore;
++      bool uncore_sysfs_added = false;
+       int ret, i, pkg = 0;
+       int num_resources;
+@@ -359,9 +360,15 @@ static int uncore_probe(struct auxiliary
+                       }
+                       /* Point to next cluster offset */
+                       cluster_offset >>= UNCORE_MAX_CLUSTER_PER_DOMAIN;
++                      uncore_sysfs_added = true;
+               }
+       }
++      if (!uncore_sysfs_added) {
++              ret = -ENODEV;
++              goto remove_clusters;
++      }
++
+       auxiliary_set_drvdata(auxdev, tpmi_uncore);
+       tpmi_uncore->root_cluster.root_domain = true;
index 665d092ca6fa41f93fdd27b8c2fad58b36cef418..8b647186560bf75ea4a4e9859d1e3adadd6c0aef 100644 (file)
@@ -548,8 +548,6 @@ iio-accel-mxc4005-allow-module-autoloading-via-of-co.patch
 iio-accel-mxc4005-reset-chip-on-probe-and-resume.patch
 misc-pvpanic-deduplicate-common-code.patch
 misc-pvpanic-pci-register-attributes-via-pci_driver.patch
-arm64-fpsimd-drop-unneeded-busy-flag.patch
-arm64-fpsimd-preserve-restore-kernel-mode-neon-at-co.patch
 arm64-fpsimd-implement-lazy-restore-for-kernel-mode-.patch
 revert-arm64-fpsimd-implement-lazy-restore-for-kerne.patch
 arm64-fpsimd-avoid-erroneous-elide-of-user-state-rel.patch
@@ -741,3 +739,10 @@ hwmon-shtc1-fix-property-misspelling.patch
 riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch
 alsa-seq-ump-fix-swapped-song-position-pointer-data.patch
 alsa-timer-set-lower-bound-of-start-tick-time.patch
+x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
+efi-libstub-only-free-priv.runtime_map-when-allocated.patch
+x86-pci-skip-early-e820-check-for-ecam-region.patch
+kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch
+genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch
+platform-x86-intel-tpmi-handle-error-from-tpmi_process_info.patch
+platform-x86-intel-uncore-freq-don-t-present-root-domain-on-error.patch
diff --git a/queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch b/queue-6.6/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch
new file mode 100644 (file)
index 0000000..81ddf45
--- /dev/null
@@ -0,0 +1,107 @@
+From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 16 May 2024 11:05:42 +0200
+Subject: x86/efistub: Omit physical KASLR when memory reservations exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream.
+
+The legacy decompressor has elaborate logic to ensure that the
+randomized physical placement of the decompressed kernel image does not
+conflict with any memory reservations, including ones specified on the
+command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are
+taken into account by the kernel proper at a later stage.
+
+When booting in EFI mode, it is the firmware's job to ensure that the
+chosen range does not conflict with any memory reservations that it
+knows about, and this is trivially achieved by using the firmware's
+memory allocation APIs.
+
+That leaves reservations specified on the command line, though, which
+the firmware knows nothing about, as these regions have no other special
+significance to the platform. Since commit
+
+  a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+
+these reservations are not taken into account when randomizing the
+physical placement, which may result in conflicts where the memory
+cannot be reserved by the kernel proper because its own executable image
+resides there.
+
+To avoid having to duplicate or reuse the existing complicated logic,
+disable physical KASLR entirely when such overrides are specified. These
+are mostly diagnostic tools or niche features, and physical KASLR (as
+opposed to virtual KASLR, which is much more important as it affects the
+memory addresses observed by code executing in the kernel) is something
+we can live without.
+
+Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com
+Reported-by: Ben Chaney <bchaney@akamai.com>
+Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
+Cc:  <stable@vger.kernel.org> # v6.1+
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/x86-stub.c |   28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -776,6 +776,26 @@ static void error(char *str)
+       efi_warn("Decompression failed: %s\n", str);
+ }
++static const char *cmdline_memmap_override;
++
++static efi_status_t parse_options(const char *cmdline)
++{
++      static const char opts[][14] = {
++              "mem=", "memmap=", "efi_fake_mem=", "hugepages="
++      };
++
++      for (int i = 0; i < ARRAY_SIZE(opts); i++) {
++              const char *p = strstr(cmdline, opts[i]);
++
++              if (p == cmdline || (p > cmdline && isspace(p[-1]))) {
++                      cmdline_memmap_override = opts[i];
++                      break;
++              }
++      }
++
++      return efi_parse_options(cmdline);
++}
++
+ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+ {
+       unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+@@ -807,6 +827,10 @@ static efi_status_t efi_decompress_kerne
+                   !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
+                       efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n");
+                       seed[0] = 0;
++              } else if (cmdline_memmap_override) {
++                      efi_info("%s detected on the kernel command line - disabling physical KASLR\n",
++                               cmdline_memmap_override);
++                      seed[0] = 0;
+               }
+       }
+@@ -881,7 +905,7 @@ void __noreturn efi_stub_entry(efi_handl
+       }
+ #ifdef CONFIG_CMDLINE_BOOL
+-      status = efi_parse_options(CONFIG_CMDLINE);
++      status = parse_options(CONFIG_CMDLINE);
+       if (status != EFI_SUCCESS) {
+               efi_err("Failed to parse options\n");
+               goto fail;
+@@ -890,7 +914,7 @@ void __noreturn efi_stub_entry(efi_handl
+       if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+               unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+                                              ((u64)boot_params->ext_cmd_line_ptr << 32));
+-              status = efi_parse_options((char *)cmdline_paddr);
++              status = parse_options((char *)cmdline_paddr);
+               if (status != EFI_SUCCESS) {
+                       efi_err("Failed to parse options\n");
+                       goto fail;
diff --git a/queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch b/queue-6.6/x86-pci-skip-early-e820-check-for-ecam-region.patch
new file mode 100644 (file)
index 0000000..5cae82a
--- /dev/null
@@ -0,0 +1,131 @@
+From 199f968f1484a14024d0d467211ffc2faf193eb4 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Wed, 17 Apr 2024 15:40:12 -0500
+Subject: x86/pci: Skip early E820 check for ECAM region
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 199f968f1484a14024d0d467211ffc2faf193eb4 upstream.
+
+Arul, Mateusz, Imcarneiro91, and Aman reported a regression caused by
+07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map").  On the
+Lenovo Legion 9i laptop, that commit removes the ECAM area from E820, which
+means the early E820 validation fails, which means we don't enable ECAM in
+the "early MCFG" path.
+
+The static MCFG table describes ECAM without depending on the ACPI
+interpreter.  Many Legion 9i ACPI methods rely on that, so they fail when
+PCI config access isn't available, resulting in the embedded controller,
+PS/2, audio, trackpad, and battery devices not being detected.  The _OSC
+method also fails, so Linux can't take control of the PCIe hotplug, PME,
+and AER features:
+
+  # pci_mmcfg_early_init()
+
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+  PCI: not using ECAM ([mem 0xc0000000-0xce0fffff] not reserved)
+
+  ACPI Error: AE_ERROR, Returned by Handler for [PCI_Config] (20230628/evregion-300)
+  ACPI: Interpreter enabled
+  ACPI: Ignoring error and continuing table load
+  ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+  ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+  ACPI: Skipping parse of AML opcode: OpcodeName unavailable (0x0010)
+  ACPI BIOS Error (bug): Could not resolve symbol [\_SB.PC00.RP01._SB.PC00], AE_NOT_FOUND (20230628/dswload2-162)
+  ACPI Error: AE_NOT_FOUND, During name lookup/catalog (20230628/psobject-220)
+  ...
+  ACPI Error: Aborting method \_SB.PC00._OSC due to previous error (AE_NOT_FOUND) (20230628/psparse-529)
+  acpi PNP0A08:00: _OSC: platform retains control of PCIe features (AE_NOT_FOUND)
+
+  # pci_mmcfg_late_init()
+
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] (base 0xc0000000) for domain 0000 [bus 00-e0]
+  PCI: [Firmware Info]: ECAM [mem 0xc0000000-0xce0fffff] not reserved in ACPI motherboard resources
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] is EfiMemoryMappedIO; assuming valid
+  PCI: ECAM [mem 0xc0000000-0xce0fffff] reserved to work around lack of ACPI motherboard _CRS
+
+Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be reserved by a PNP0C02
+resource, but there's no requirement to mention it in E820, so we shouldn't
+look at E820 to validate the ECAM space described by MCFG.
+
+In 2006, 946f2ee5c731 ("[PATCH] i386/x86-64: Check that MCFG points to an
+e820 reserved area") added a sanity check of E820 to work around buggy MCFG
+tables, but that over-aggressive validation causes failures like this one.
+
+Keep the E820 validation check for machines older than 2016, an arbitrary
+ten years after 946f2ee5c731, so machines that depend on it don't break.
+
+Skip the early E820 check for 2016 and newer BIOSes since there's no
+requirement to describe ECAM in E820.
+
+Link: https://lore.kernel.org/r/20240417204012.215030-2-helgaas@kernel.org
+Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map")
+Reported-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218444
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Tested-by: Mateusz Kaduk <mateusz.kaduk@gmail.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/mmconfig-shared.c |   40 +++++++++++++++++++++++++++++-----------
+ 1 file changed, 29 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/pci/mmconfig-shared.c
++++ b/arch/x86/pci/mmconfig-shared.c
+@@ -527,7 +527,34 @@ pci_mmcfg_check_reserved(struct device *
+ {
+       struct resource *conflict;
+-      if (!early && !acpi_disabled) {
++      if (early) {
++
++              /*
++               * Don't try to do this check unless configuration type 1
++               * is available.  How about type 2?
++               */
++
++              /*
++               * 946f2ee5c731 ("Check that MCFG points to an e820
++               * reserved area") added this E820 check in 2006 to work
++               * around BIOS defects.
++               *
++               * Per PCI Firmware r3.3, sec 4.1.2, ECAM space must be
++               * reserved by a PNP0C02 resource, but it need not be
++               * mentioned in E820.  Before the ACPI interpreter is
++               * available, we can't check for PNP0C02 resources, so
++               * there's no reliable way to verify the region in this
++               * early check.  Keep it only for the old machines that
++               * motivated 946f2ee5c731.
++               */
++              if (dmi_get_bios_year() < 2016 && raw_pci_ops)
++                      return is_mmconf_reserved(e820__mapped_all, cfg, dev,
++                                                "E820 entry");
++
++              return true;
++      }
++
++      if (!acpi_disabled) {
+               if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+                                      "ACPI motherboard resource"))
+                       return true;
+@@ -563,16 +590,7 @@ pci_mmcfg_check_reserved(struct device *
+        * For MCFG information constructed from hotpluggable host bridge's
+        * _CBA method, just assume it's reserved.
+        */
+-      if (pci_mmcfg_running_state)
+-              return true;
+-
+-      /* Don't try to do this check unless configuration
+-         type 1 is available. how about type 2 ?*/
+-      if (raw_pci_ops)
+-              return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+-                                        "E820 entry");
+-
+-      return false;
++      return pci_mmcfg_running_state;
+ }
+ static void __init pci_mmcfg_reject_broken(int early)