--- /dev/null
+From 5858b687559809f05393af745cbadf06dee61295 Mon Sep 17 00:00:00 2001
+From: "Chunguang.xu" <chunguang.xu@shopee.com>
+Date: Tue, 3 Dec 2024 11:34:41 +0800
+Subject: nvme-rdma: unquiesce admin_q before destroy it
+
+From: Chunguang.xu <chunguang.xu@shopee.com>
+
+commit 5858b687559809f05393af745cbadf06dee61295 upstream.
+
+Kernel will hang on destroy admin_q while we create ctrl failed, such
+as following calltrace:
+
+PID: 23644 TASK: ff2d52b40f439fc0 CPU: 2 COMMAND: "nvme"
+ #0 [ff61d23de260fb78] __schedule at ffffffff8323bc15
+ #1 [ff61d23de260fc08] schedule at ffffffff8323c014
+ #2 [ff61d23de260fc28] blk_mq_freeze_queue_wait at ffffffff82a3dba1
+ #3 [ff61d23de260fc78] blk_freeze_queue at ffffffff82a4113a
+ #4 [ff61d23de260fc90] blk_cleanup_queue at ffffffff82a33006
+ #5 [ff61d23de260fcb0] nvme_rdma_destroy_admin_queue at ffffffffc12686ce
+ #6 [ff61d23de260fcc8] nvme_rdma_setup_ctrl at ffffffffc1268ced
+ #7 [ff61d23de260fd28] nvme_rdma_create_ctrl at ffffffffc126919b
+ #8 [ff61d23de260fd68] nvmf_dev_write at ffffffffc024f362
+ #9 [ff61d23de260fe38] vfs_write at ffffffff827d5f25
+ RIP: 00007fda7891d574 RSP: 00007ffe2ef06958 RFLAGS: 00000202
+ RAX: ffffffffffffffda RBX: 000055e8122a4d90 RCX: 00007fda7891d574
+ RDX: 000000000000012b RSI: 000055e8122a4d90 RDI: 0000000000000004
+ RBP: 00007ffe2ef079c0 R8: 000000000000012b R9: 000055e8122a4d90
+ R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000004
+ R13: 000055e8122923c0 R14: 000000000000012b R15: 00007fda78a54500
+ ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
+
+This due to we have quiesced admi_q before cancel requests, but forgot
+to unquiesce before destroy it, as a result we fail to drain the
+pending requests, and hang on blk_mq_freeze_queue_wait() forever. Here
+try to reuse nvme_rdma_teardown_admin_queue() to fix this issue and
+simplify the code.
+
+Fixes: 958dc1d32c80 ("nvme-rdma: add clean action for failed reconnection")
+Reported-by: Yingfu.zhou <yingfu.zhou@shopee.com>
+Signed-off-by: Chunguang.xu <chunguang.xu@shopee.com>
+Signed-off-by: Yue.zhao <yue.zhao@shopee.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+[Minor context change fixed]
+Signed-off-by: Feng Liu <Feng.Liu3@windriver.com>
+Signed-off-by: He Zhe <Zhe.He@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/rdma.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/drivers/nvme/host/rdma.c
++++ b/drivers/nvme/host/rdma.c
+@@ -1083,13 +1083,7 @@ destroy_io:
+ nvme_rdma_free_io_queues(ctrl);
+ }
+ destroy_admin:
+- nvme_quiesce_admin_queue(&ctrl->ctrl);
+- blk_sync_queue(ctrl->ctrl.admin_q);
+- nvme_rdma_stop_queue(&ctrl->queues[0]);
+- nvme_cancel_admin_tagset(&ctrl->ctrl);
+- if (new)
+- nvme_remove_admin_tag_set(&ctrl->ctrl);
+- nvme_rdma_destroy_admin_queue(ctrl);
++ nvme_rdma_teardown_admin_queue(ctrl, new);
+ return ret;
+ }
+
--- /dev/null
+From 0974d03eb479384466d828d65637814bee6b26d7 Mon Sep 17 00:00:00 2001
+From: Nathan Lynch <nathanl@linux.ibm.com>
+Date: Thu, 30 May 2024 19:44:12 -0500
+Subject: powerpc/rtas: Prevent Spectre v1 gadget construction in sys_rtas()
+
+From: Nathan Lynch <nathanl@linux.ibm.com>
+
+commit 0974d03eb479384466d828d65637814bee6b26d7 upstream.
+
+Smatch warns:
+
+ arch/powerpc/kernel/rtas.c:1932 __do_sys_rtas() warn: potential
+ spectre issue 'args.args' [r] (local cap)
+
+The 'nargs' and 'nret' locals come directly from a user-supplied
+buffer and are used as indexes into a small stack-based array and as
+inputs to copy_to_user() after they are subject to bounds checks.
+
+Use array_index_nospec() after the bounds checks to clamp these values
+for speculative execution.
+
+Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
+Reported-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Breno Leitao <leitao@debian.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240530-sys_rtas-nargs-nret-v1-1-129acddd4d89@linux.ibm.com
+[Minor context change fixed]
+Signed-off-by: Cliff Liu <donghua.liu@windriver.com>
+Signed-off-by: He Zhe <Zhe.He@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/rtas.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/powerpc/kernel/rtas.c
++++ b/arch/powerpc/kernel/rtas.c
+@@ -18,6 +18,7 @@
+ #include <linux/kernel.h>
+ #include <linux/lockdep.h>
+ #include <linux/memblock.h>
++#include <linux/nospec.h>
+ #include <linux/of.h>
+ #include <linux/of_fdt.h>
+ #include <linux/reboot.h>
+@@ -1839,6 +1840,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args _
+ || nargs + nret > ARRAY_SIZE(args.args))
+ return -EINVAL;
+
++ nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args));
++ nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs);
++
+ /* Copy in args. */
+ if (copy_from_user(args.args, uargs->args,
+ nargs * sizeof(rtas_arg_t)) != 0)
efi-libstub-bump-up-efi_mmap_nr_slack_slots-to-32.patch
x86-xen-move-xen_reserve_extra_memory.patch
x86-xen-fix-memblock_reserve-usage-on-pvh.patch
+x86-tdx-fix-arch_safe_halt-execution-for-tdx-vms.patch
+x86-split_lock-fix-the-delayed-detection-logic.patch
+nvme-rdma-unquiesce-admin_q-before-destroy-it.patch
+powerpc-rtas-prevent-spectre-v1-gadget-construction-in-sys_rtas.patch
--- /dev/null
+From c929d08df8bee855528b9d15b853c892c54e1eee Mon Sep 17 00:00:00 2001
+From: Maksim Davydov <davydov-max@yandex-team.ru>
+Date: Wed, 15 Jan 2025 16:17:04 +0300
+Subject: x86/split_lock: Fix the delayed detection logic
+
+From: Maksim Davydov <davydov-max@yandex-team.ru>
+
+commit c929d08df8bee855528b9d15b853c892c54e1eee upstream.
+
+If the warning mode with disabled mitigation mode is used, then on each
+CPU where the split lock occurred detection will be disabled in order to
+make progress and delayed work will be scheduled, which then will enable
+detection back.
+
+Now it turns out that all CPUs use one global delayed work structure.
+This leads to the fact that if a split lock occurs on several CPUs
+at the same time (within 2 jiffies), only one CPU will schedule delayed
+work, but the rest will not.
+
+The return value of schedule_delayed_work_on() would have shown this,
+but it is not checked in the code.
+
+A diagram that can help to understand the bug reproduction:
+
+ - sld_update_msr() enables/disables SLD on both CPUs on the same core
+
+ - schedule_delayed_work_on() internally checks WORK_STRUCT_PENDING_BIT.
+ If a work has the 'pending' status, then schedule_delayed_work_on()
+ will return an error code and, most importantly, the work will not
+ be placed in the workqueue.
+
+Let's say we have a multicore system on which split_lock_mitigate=0 and
+a multithreaded application is running that calls splitlock in multiple
+threads. Due to the fact that sld_update_msr() affects the entire core
+(both CPUs), we will consider 2 CPUs from different cores. Let the 2
+threads of this application schedule to CPU0 (core 0) and to CPU 2
+(core 1), then:
+
+| || |
+| CPU 0 (core 0) || CPU 2 (core 1) |
+|_________________________________||___________________________________|
+| || |
+| 1) SPLIT LOCK occured || |
+| || |
+| 2) split_lock_warn() || |
+| || |
+| 3) sysctl_sld_mitigate == 0 || |
+| (work = &sl_reenable) || |
+| || |
+| 4) schedule_delayed_work_on() || |
+| (reenable will be called || |
+| after 2 jiffies on CPU 0) || |
+| || |
+| 5) disable SLD for core 0 || |
+| || |
+| ------------------------- || |
+| || |
+| || 6) SPLIT LOCK occured |
+| || |
+| || 7) split_lock_warn() |
+| || |
+| || 8) sysctl_sld_mitigate == 0 |
+| || (work = &sl_reenable, |
+| || the same address as in 3) ) |
+| || |
+| 2 jiffies || 9) schedule_delayed_work_on() |
+| || fials because the work is in |
+| || the pending state since 4). |
+| || The work wasn't placed to the |
+| || workqueue. reenable won't be |
+| || called on CPU 2 |
+| || |
+| || 10) disable SLD for core 0 |
+| || |
+| || From now on SLD will |
+| || never be reenabled on core 1 |
+| || |
+| ------------------------- || |
+| || |
+| 11) enable SLD for core 0 by || |
+| __split_lock_reenable || |
+| || |
+
+If the application threads can be scheduled to all processor cores,
+then over time there will be only one core left, on which SLD will be
+enabled and split lock will be able to be detected; and on all other
+cores SLD will be disabled all the time.
+
+Most likely, this bug has not been noticed for so long because
+sysctl_sld_mitigate default value is 1, and in this case a semaphore
+is used that does not allow 2 different cores to have SLD disabled at
+the same time, that is, strictly only one work is placed in the
+workqueue.
+
+In order to fix the warning mode with disabled mitigation mode,
+delayed work has to be per-CPU. Implement it.
+
+Fixes: 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode")
+Signed-off-by: Maksim Davydov <davydov-max@yandex-team.ru>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ravi Bangoria <ravi.bangoria@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20250115131704.132609-1-davydov-max@yandex-team.ru
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/intel.c | 20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -1168,7 +1168,13 @@ static void __split_lock_reenable(struct
+ {
+ sld_update_msr(true);
+ }
+-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
++/*
++ * In order for each CPU to schedule its delayed work independently of the
++ * others, delayed work struct must be per-CPU. This is not required when
++ * sysctl_sld_mitigate is enabled because of the semaphore that limits
++ * the number of simultaneously scheduled delayed works to 1.
++ */
++static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
+
+ /*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+@@ -1189,7 +1195,7 @@ static int splitlock_cpu_offline(unsigne
+
+ static void split_lock_warn(unsigned long ip)
+ {
+- struct delayed_work *work;
++ struct delayed_work *work = NULL;
+ int cpu;
+
+ if (!current->reported_split_lock)
+@@ -1211,11 +1217,17 @@ static void split_lock_warn(unsigned lon
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ work = &sl_reenable_unlock;
+- } else {
+- work = &sl_reenable;
+ }
+
+ cpu = get_cpu();
++
++ if (!work) {
++ work = this_cpu_ptr(&sl_reenable);
++ /* Deferred initialization of per-CPU struct */
++ if (!work->work.func)
++ INIT_DELAYED_WORK(work, __split_lock_reenable);
++ }
++
+ schedule_delayed_work_on(cpu, work, 2);
+
+ /* Disable split lock detection on this CPU to make progress */
--- /dev/null
+From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001
+From: Vishal Annapurve <vannapurve@google.com>
+Date: Fri, 28 Feb 2025 01:44:15 +0000
+Subject: x86/tdx: Fix arch_safe_halt() execution for TDX VMs
+
+From: Vishal Annapurve <vannapurve@google.com>
+
+commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 upstream.
+
+Direct HLT instruction execution causes #VEs for TDX VMs which is routed
+to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE
+handler will enable interrupts before TDCALL is routed to hypervisor
+leading to missed wakeup events, as current TDX spec doesn't expose
+interruptibility state information to allow #VE handler to selectively
+enable interrupts.
+
+Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
+prevented the idle routines from executing HLT instruction in STI-shadow.
+But it missed the paravirt routine which can be reached via this path
+as an example:
+
+ kvm_wait() =>
+ safe_halt() =>
+ raw_safe_halt() =>
+ arch_safe_halt() =>
+ irq.safe_halt() =>
+ pv_native_safe_halt()
+
+To reliably handle arch_safe_halt() for TDX VMs, introduce explicit
+dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt()
+routines with TDX-safe versions that execute direct TDCALL and needed
+interrupt flag updates. Executing direct TDCALL brings in additional
+benefit of avoiding HLT related #VEs altogether.
+
+As tested by Ryan Afranji:
+
+ "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads
+ to many halt calls. TDX VMs suffered a poor score before this patchset.
+
+ Verified the major performance improvement with this patchset applied."
+
+Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
+Signed-off-by: Vishal Annapurve <vannapurve@google.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Tested-by: Ryan Afranji <afranji@google.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig | 1 +
+ arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++-
+ arch/x86/include/asm/tdx.h | 4 ++--
+ arch/x86/kernel/process.c | 2 +-
+ 4 files changed, 29 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -881,6 +881,7 @@ config INTEL_TDX_GUEST
+ depends on X86_64 && CPU_SUP_INTEL
+ depends on X86_X2APIC
+ depends on EFI_STUB
++ depends on PARAVIRT
+ select ARCH_HAS_CC_PLATFORM
+ select X86_MEM_ENCRYPT
+ select X86_MCE
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -13,6 +13,7 @@
+ #include <asm/ia32.h>
+ #include <asm/insn.h>
+ #include <asm/insn-eval.h>
++#include <asm/paravirt_types.h>
+ #include <asm/pgtable.h>
+ #include <asm/traps.h>
+
+@@ -334,7 +335,7 @@ static int handle_halt(struct ve_info *v
+ return ve_instr_len(ve);
+ }
+
+-void __cpuidle tdx_safe_halt(void)
++void __cpuidle tdx_halt(void)
+ {
+ const bool irq_disabled = false;
+
+@@ -345,6 +346,16 @@ void __cpuidle tdx_safe_halt(void)
+ WARN_ONCE(1, "HLT instruction emulation failed\n");
+ }
+
++static void __cpuidle tdx_safe_halt(void)
++{
++ tdx_halt();
++ /*
++ * "__cpuidle" section doesn't support instrumentation, so stick
++ * with raw_* variant that avoids tracing hooks.
++ */
++ raw_local_irq_enable();
++}
++
+ static int read_msr(struct pt_regs *regs, struct ve_info *ve)
+ {
+ struct tdx_hypercall_args args = {
+@@ -889,6 +900,19 @@ void __init tdx_early_init(void)
+ x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+
+ /*
++ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
++ * will enable interrupts before HLT TDCALL invocation if executed
++ * in STI-shadow, possibly resulting in missed wakeup events.
++ *
++ * Modify all possible HLT execution paths to use TDX specific routines
++ * that directly execute TDCALL and toggle the interrupt state as
++ * needed after TDCALL completion. This also reduces HLT related #VEs
++ * in addition to having a reliable halt logic execution.
++ */
++ pv_ops.irq.safe_halt = tdx_safe_halt;
++ pv_ops.irq.halt = tdx_halt;
++
++ /*
+ * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
+ * bringup low level code. That raises #VE which cannot be handled
+ * there.
+--- a/arch/x86/include/asm/tdx.h
++++ b/arch/x86/include/asm/tdx.h
+@@ -46,7 +46,7 @@ void tdx_get_ve_info(struct ve_info *ve)
+
+ bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+-void tdx_safe_halt(void);
++void tdx_halt(void);
+
+ bool tdx_early_handle_ve(struct pt_regs *regs);
+
+@@ -55,7 +55,7 @@ int tdx_mcall_get_report0(u8 *reportdata
+ #else
+
+ static inline void tdx_early_init(void) { };
+-static inline void tdx_safe_halt(void) { };
++static inline void tdx_halt(void) { };
+
+ static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -955,7 +955,7 @@ void select_idle_routine(const struct cp
+ static_call_update(x86_idle, mwait_idle);
+ } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ pr_info("using TDX aware idle routine\n");
+- static_call_update(x86_idle, tdx_safe_halt);
++ static_call_update(x86_idle, tdx_halt);
+ } else
+ static_call_update(x86_idle, default_idle);
+ }